#!/usr/local/bin/php # Copy data from vqWiki to MoinMoin wiki # Jeff Olson - October 20, 2005 # Based on "mediawiki2moin.php" from http://moinmoin.wikiwikiweb.de/MediaWikiConverter $title[$a]\n"; if (filesize($fullPath) > 0) { $text[$a] = fread($fp, filesize($fullPath)); } else { $text[$a] = ""; } #echo $text[$a] . "\n\n"; $a++; } } closedir($handle); } # Get historical versions - still needs some work #$versionsDir = "$inputDir/versions"; #chdir($versionsDir); #for ($i = 0; $i < count($title); $i++) #{ # $historical[$i] = glob("$title[$i].txt.*"); # print_r($historical[$i]); # echo "\t" . count($historical[$i]) . " versions found\n"; #} # Go to output directory for MoinMoin wiki echo "\n\n*****\nCreating New Files\n*****\n"; chdir($outputDir) or die; chdir("pages") or die; $count = count($title); for ($a = 0; $a < $count; $a++) { echo "$a: $title[$a]\n"; # Fix title $title[$a] = fix_title($title[$a]); echo "\tfixed: $title[$a]\n"; # Parse historical versions for date & time, ip address # TODO # Delete existing folder for page #echo "deleting any existing folder with name $title[$a]\n"; system("rm -rf \"$title[$a]\""); # Remake folder for page #echo "trying to make $title[$a]\n"; mkdir($title[$a]) or die; #echo "trying to change to $title[$a]\n"; chdir($title[$a]) or die; #echo "current dir: " . getcwd() . "\n"; # Write out needed files & directories $file = fopen("current", "w"); fputs($file, "00000001"); fclose($file); mkdir("revisions") or die; chdir("revisions") or die; #echo "current dir: " . getcwd() . "\n"; $file = fopen("00000001", "w"); $file_text = explode("\n", $text[$a]); # Copy text from old to new, fixing syntax as we go # - also pass in title for copying attachments and input & output directory to copy them $file_text = change_syntax($file_text, $title[$a], $inputDir, $outputDir); # Create output file $b = 0; while ($b < count($file_text)) { fputs($file, rtrim($file_text[$b]) . "\n"); $b++; } unset($file_text); fclose($file); chdir("..") or die; #echo "current dir: " . getcwd() . "\n"; # chown & chmod to set correct permissions (this assumes we're running script as root) system("chown -R apache:apache ."); system("chmod -R g+w ."); system("chmod -R o-rx ."); chdir("..") or die; } chdir("..") or die; function fix_title($title) { $title = utf8_encode(str_replace(" ", "_", $title)); $title = utf8_encode(str_replace("+", "_", $title)); return $title; } function change_syntax ($array, $pageTitle, $inputDir, $outputDir) { # initialize $in_preformatted_text = 0; $in_multiple_line_code = 0; $in_multiple_line_java_code = 0; $in_multiple_line_html_code = 0; # patterns $java_start_tag_pattern = "/\[\]/"; $java_end_tag_pattern = "/\[<\/java>\]/"; $html_start_tag_pattern = "/\[\]/"; $html_end_tag_pattern = "/\[<\/html>\]/"; for ($a = 0; $a < count($array); $a++) { # assign row as a reference to current array item $row =& $array[$a]; # Handle multiple-line preformatted text if ($in_preformatted_text) { # found the end? if (preg_match("/^\s*$/", $row)) { $row = "}}}"; $in_preformatted_text = 0; } else { # do nothing - skip all other substitutions continue; } } # Handle multiple-line code elseif ($in_multiple_line_code) { # found the end? if (preg_match("/}}}/", $row)) { $in_multiple_line_code = 0; } else { # do nothing - skip all other substitutions continue; } } # Handle multiple-line java code elseif ($in_multiple_line_java_code) { # found the end? if (preg_match($java_end_tag_pattern, $row)) { $row = preg_replace($java_end_tag_pattern, "}}}", $row); $in_multiple_line_java_code = 0; } else { # do nothing - skip all other substitutions continue; } } # Handle multiple-line html code elseif ($in_multiple_line_html_code) { # found the end? if (preg_match($html_end_tag_pattern, $row)) { $row = preg_replace($html_end_tag_pattern, "}}}", $row); $in_multiple_line_html_code = 0; } else { # do nothing - skip all other substitutions continue; } } # Not in multiple-line preformatted text or multiple-line code block else { # Preformatted text - @@@@ on line by self, but ending on another line where it's all blank if (preg_match("/^@@@@\s*$/", $row)) { $row = preg_replace("/^\s*@@@@\s*$/", "{{{", $row); $in_preformatted_text = 1; # Don't do any more processing on this line continue; } # Code - {{{ xxx }}} - may be on same or different lines if (preg_match("/{{{/", $row)) { # if we don't find the closing braces, we are in a multiple-line code situation if (!preg_match("/}}}/", $row)) { $in_multiple_line_code = 1; } # Don't do any more processing on this line continue; } # Java Code - [] xxx [] - may be on same or different lines if (preg_match($java_start_tag_pattern, $row)) { $row = preg_replace($java_start_tag_pattern, "{{{#!java", $row); # if we don't find the closing tag, we are in a multiple-line java code situation if (!preg_match($java_end_tag_pattern, $row)) { $in_multiple_line_java_code = 1; } # otherwise, replace end tag else { $row = preg_replace($java_end_tag_pattern, "}}}", $row); } # also add line break after open tag $row = preg_replace("/{{{#!java/", "{{{#!java\n", $row); # Don't do any more processing on this line continue; } # HTML Code - [] xxx [] - may be on same or different lines if (preg_match($html_start_tag_pattern, $row)) { #echo "in html\n"; $row = preg_replace($html_start_tag_pattern, "{{{#!html", $row); # if we don't find the closing tag, we are in a multiple-line html code situation if (!preg_match($html_end_tag_pattern, $row)) { $in_multiple_line_html_code = 1; } # otherwise, replace end tag else { $row = preg_replace($html_end_tag_pattern, "}}}", $row); } # also add line break after open tag $row = preg_replace("/{{{#!html/", "{{{#!html\n", $row); #echo "$row\n"; # Don't do any more processing on this line continue; } # Tables $row = preg_replace("/####/", "", $row ); # don't need these $row = preg_replace("/^([^#]+)##/", "||$1||", $row, 1); # add 1st column start marker $row = preg_replace("/##/", "||", $row ); # all other markers # Backtick links: `link` => ["link"] - must come before 'No formatting code' $row = preg_replace("/`([^`]+)`/", "[\"$1\"]", $row); # C2 links $row = preg_replace("/c2:/", "wiki:Wiki:", $row); # No formatting code (__) - must come before underline conversion step $row = preg_replace("/__([^_]+)__/", "`$1`", $row); # Underline: ===text=== => __text__ (must come before headings) $row = preg_replace("/===([^=]+)===/", "__$1__", $row); # underline # Headings $row = preg_replace("/!!!([^!]+)!!!/", "= $1 =", $row); # heading level 1 $row = preg_replace("/!!([^!]+)!!/", "== $1 ==", $row); # heading level 2 $row = preg_replace("/!([^!]+)!/", "=== $1 ===", $row); # heading level 3 # Bulleted Lists: (there may be a better way to do this) $row = preg_replace("/^\t\*/", " * ", $row); # bullet indented 1 $row = preg_replace("/^\t\t\*/", " * ", $row); # bullet indented 2 $row = preg_replace("/^\t\t\t\*/", " * ", $row); # bullet indented 3 # Numbered Lists: # => 1. (note: there may be a better way to do this) $row = preg_replace("/^\t\#/", " 1. ", $row); # item indented 1 $row = preg_replace("/^\t\t\#/", " 1. ", $row); # item indented 2 $row = preg_replace("/^\t\t\t\#/", " 1. ", $row); # item indented 3 # Line breaks inside lists # echo "$row\n"; if (preg_match("/^(\s*)(1\.|\*)(.*)@@/", $row, $matches)) { $leadingSpaces = $matches[1]; #echo "spaces: ->$leadingSpaces<- \n"; #echo "before: $row\n"; $row = preg_replace("/@@/", "\n$leadingSpaces", $row); # add two spaces #echo "after : $row\n"; } # Other line breaks - appearing anywhere else $row = preg_replace("/@@/", " [[BR]] ", $row); # Attachments: attach: -> attachment: - Also copy attachments to new wiki # does not handle attachments in this format: attach:"File name with spaces" -- fix those manually $attachmentPattern = "/attach:([\w.-]+)/"; # this is not a complete filename regex, but works for me!!! if (preg_match($attachmentPattern, $row, $attachmentMatches)) { # Fix syntax $row = preg_replace($attachmentPattern, "attachment:$1", $row); # Copy file attachments: note this assumes there is only one attachment per line!! $attachmentFilename = $attachmentMatches[1]; $existingLocation = "$inputDir/upload/jsp/$attachmentFilename"; #echo "existing location: $existingLocation\n"; $newDirectory = "$outputDir/pages/$pageTitle/attachments"; $newLocation = "$newDirectory/$attachmentFilename"; #echo "new location: $newLocation\n"; if (!is_dir($newDirectory)) { #echo "making new attachments directory: $newDirectory\n"; mkdir($newDirectory); } #echo "current dir: " . getcwd() . "\n"; echo "\tattachment...$attachmentFilename\n"; copy($existingLocation, $newLocation); } # Horizontal rules - no conversion necessary # Bold/italic - no conversion necessary # Handle line break issue # Look at next line if ($a+1 < count($array)) # only proceed if there are more lines { $nextRow = $array[$a+1]; $emptyRowPattern = "/^\s*$/"; # figure out if we should add a line break - only if all of these conditions are met if (!preg_match($emptyRowPattern, $row) # current row is not empty && !preg_match("/----/", $row) # current row does not have horizontal rule && !preg_match("/=+[^=]+=+/", $row) # current row is not a heading && !preg_match($emptyRowPattern, $nextRow) # next row is not empty && !preg_match("/^\t+[\*\#]/", $nextRow) # next row doesn't start with bullet or numbered item && !preg_match("/##/", $nextRow) # next row doesn't contain table markup ) { # only if all above conditions are met do we add a break $row .= " [[BR]]"; # include space before to prevent "Java:[[BR]] making an Interwiki link, among other things } } } } return $array; } # Code to fix titles that I did not need /* $quoted = array(); $in_parenthesis = false; for ($i = 0; $i < strlen($title[$a]); $i++) { $curchar = substr ($title[$a], $i, 1); if (ereg('[^a-zA-Z0-9_]', $curchar)) { if (!$in_parenthesis) { $quoted[] = '('; $in_parenthesis = true; } $quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT); } else { if ($in_parenthesis) { $quoted[] = ')'; $in_parenthesis = false; } $quoted[] = $curchar; } } if ($in_parenthesis) { $quoted[] = ')'; } $title[$a] = implode('', $quoted); unset($quoted); */ ?>