#!/usr/local/bin/php

# Copy data from vqWiki to MoinMoin wiki
#  Jeff Olson <jeff@olsonzoo.com> - October 20, 2005   
#  Based on "mediawiki2moin.php" from http://moinmoin.wikiwikiweb.de/MediaWikiConverter

<?php

# Set these variables.  
# - $inputDir is location of vqWiki data files
# - $outputDir is location where MoinMoin pages directory is located...
#   WARNING! Any existing pages in $outputDir will be deleted if they exist in $inputDir

$inputDir = '/home/wiki/wiki';
$outputDir = '/codeswiki/data';

echo "*****\n\nReading Existing Files\n*****\n";

# Read input data from vqWiki
$a = 0;
if ($handle = opendir($inputDir)) 
{
	echo "Directory handle: $handle\n";
	echo "Files:\n";

	/* This is the correct way to loop over the directory. */
	while (false !== ($file = readdir($handle))) 
	{
		$fullPath = $inputDir . "/" . $file;
		$fp = fopen($fullPath, "r");
		if (fnmatch("*.txt", $file) 
			#&& fnmatch("Data+Migrations.txt", $file)  # uncomment if testing on specific files
		   ) 
		{
			echo "$file\n";
			$title[$a] = preg_replace("/\.txt/", "", $file);
			#echo "\t$a -> $title[$a]\n";
			if (filesize($fullPath) > 0)
			{
				$text[$a] = fread($fp, filesize($fullPath));
			}
			else 
			{
				$text[$a] = "";
			}
			#echo $text[$a] . "\n\n";
			$a++;
		}
	}

	closedir($handle);
}

# Get historical versions - still needs some work
#$versionsDir = "$inputDir/versions";
#chdir($versionsDir);
#for ($i = 0; $i < count($title); $i++)
#{
#	$historical[$i] = glob("$title[$i].txt.*");
#	print_r($historical[$i]);
#	echo "\t" . count($historical[$i]) . " versions found\n";
#}


# Go to output directory for MoinMoin wiki
echo "\n\n*****\nCreating New Files\n*****\n";
chdir($outputDir) or die;
chdir("pages") or die;

$count = count($title);
for ($a = 0; $a < $count; $a++) 
{
	echo "$a: $title[$a]\n";
	
	# Fix title
	$title[$a] = fix_title($title[$a]);
	echo "\tfixed: $title[$a]\n";
	
	# Parse historical versions for date & time, ip address
	# TODO
	
	# Delete existing folder for page
	#echo "deleting any existing folder with name $title[$a]\n";
	system("rm -rf \"$title[$a]\"");

	# Remake folder for page
	#echo "trying to make $title[$a]\n";
	mkdir($title[$a]) or die;
	
	#echo "trying to change to $title[$a]\n";
	chdir($title[$a]) or die;
	
	#echo "current dir: " . getcwd() . "\n";
	
	# Write out needed files & directories
	$file = fopen("current", "w");
	fputs($file, "00000001");
	fclose($file);
	mkdir("revisions") or die;
	chdir("revisions") or die;
	#echo "current dir: " . getcwd() . "\n";

	$file = fopen("00000001", "w");
	$file_text = explode("\n", $text[$a]);
	
	
	# Copy text from old to new, fixing syntax as we go 
	# - also pass in title for copying attachments and input & output directory to copy them
	$file_text = change_syntax($file_text, $title[$a], $inputDir, $outputDir);
	
	# Create output file
	$b = 0;
	while ($b < count($file_text)) 
	{
		fputs($file, rtrim($file_text[$b]) . "\n");
		$b++;
	}
	unset($file_text);
	fclose($file);
	
	chdir("..") or die;
	#echo "current dir: " . getcwd() . "\n";

	# chown & chmod to set correct permissions (this assumes we're running script as root)
	system("chown -R apache:apache .");
	system("chmod -R g+w .");
	system("chmod -R o-rx .");

	chdir("..") or die;
}
chdir("..") or die;

function fix_title($title)
{
	$title = utf8_encode(str_replace(" ", "_", $title));
	$title = utf8_encode(str_replace("+", "_", $title));
	return $title;
}

function change_syntax ($array, $pageTitle, $inputDir, $outputDir) 
{
	# initialize
	$in_preformatted_text = 0;
	$in_multiple_line_code = 0;	
	$in_multiple_line_java_code = 0;
	$in_multiple_line_html_code = 0;

	# patterns
	$java_start_tag_pattern = "/\[<java>\]/";
	$java_end_tag_pattern = "/\[<\/java>\]/";
	$html_start_tag_pattern = "/\[<html>\]/";
	$html_end_tag_pattern = "/\[<\/html>\]/";
	
	for ($a = 0; $a < count($array); $a++) 
	{
		# assign row as a reference to current array item
		$row =& $array[$a];
		
		# Handle multiple-line preformatted text
		if ($in_preformatted_text) 
		{
			# found the end?
			if (preg_match("/^\s*$/", $row)) 
			{
				$row = "}}}";
				$in_preformatted_text = 0;
			}
			else 
			{
				# do nothing - skip all other substitutions
				continue;		
			}
		}

		# Handle multiple-line code
		elseif ($in_multiple_line_code)
		{
			# found the end?
			if (preg_match("/}}}/", $row)) 
			{
				$in_multiple_line_code = 0;
			}
			else 
			{
				# do nothing - skip all other substitutions
				continue;		
			}
		}

		# Handle multiple-line java code
		elseif ($in_multiple_line_java_code)
		{
			# found the end?
			if (preg_match($java_end_tag_pattern, $row)) 
			{
				$row  = preg_replace($java_end_tag_pattern, "}}}", $row);
				$in_multiple_line_java_code = 0;
			}
			else 
			{
				# do nothing - skip all other substitutions
				continue;		
			}			
		}

		# Handle multiple-line html code
		elseif ($in_multiple_line_html_code)
		{
			# found the end?
			if (preg_match($html_end_tag_pattern, $row)) 
			{
				$row  = preg_replace($html_end_tag_pattern, "}}}", $row);
				$in_multiple_line_html_code = 0;
			}
			else 
			{
				# do nothing - skip all other substitutions
				continue;		
			}			
		}

		# Not in multiple-line preformatted text or multiple-line code block
		else 
		{
			# Preformatted text - @@@@ on line by self, but ending on another line where it's all blank
			if (preg_match("/^@@@@\s*$/", $row)) 
			{
				$row  = preg_replace("/^\s*@@@@\s*$/", "{{{", $row);	
				$in_preformatted_text = 1;	

				# Don't do any more processing on this line
				continue;
			}
			
			# Code - {{{ xxx }}} - may be on same or different lines
			if (preg_match("/{{{/", $row)) 
			{
				# if we don't find the closing braces, we are in a multiple-line code situation
				if (!preg_match("/}}}/", $row)) 
				{
					$in_multiple_line_code = 1;	
				}

				# Don't do any more processing on this line
				continue;
			}
			
			# Java Code - [<java>] xxx [</java>] - may be on same or different lines
			if (preg_match($java_start_tag_pattern, $row)) 
			{
				$row  = preg_replace($java_start_tag_pattern, "{{{#!java", $row);

				# if we don't find the closing tag, we are in a multiple-line java code situation
				if (!preg_match($java_end_tag_pattern, $row)) 
				{
					$in_multiple_line_java_code = 1;	
				}
				# otherwise, replace end tag
				else
				{
					$row  = preg_replace($java_end_tag_pattern, "}}}", $row);
				}

				# also add line break after open tag
				$row  = preg_replace("/{{{#!java/", "{{{#!java\n",  $row);

				# Don't do any more processing on this line
				continue;
			}
			
			# HTML Code - [<html>] xxx [</html>] - may be on same or different lines
			if (preg_match($html_start_tag_pattern, $row)) 
			{
				#echo "in html\n";
				$row  = preg_replace($html_start_tag_pattern, "{{{#!html", $row);

				# if we don't find the closing tag, we are in a multiple-line html code situation
				if (!preg_match($html_end_tag_pattern, $row)) 
				{
					$in_multiple_line_html_code = 1;	
				}
				# otherwise, replace end tag
				else
				{
					$row  = preg_replace($html_end_tag_pattern, "}}}", $row);

				}
				# also add line break after open tag
				$row  = preg_replace("/{{{#!html/", "{{{#!html\n",  $row);

				#echo "$row\n";

				# Don't do any more processing on this line
				continue;
			}

			# Tables
			$row  = preg_replace("/####/", "", $row );                  # don't need these
			$row  = preg_replace("/^([^#]+)##/", "||$1||", $row, 1);    # add 1st column start marker
			$row  = preg_replace("/##/", "||", $row );                  # all other markers
			
			# Backtick links: `link` => ["link"] - must come before 'No formatting code'
			$row  = preg_replace("/`([^`]+)`/", "[\"$1\"]", $row);     	
			
			# C2 links
			$row  = preg_replace("/c2:/", "wiki:Wiki:", $row);     	
			
			# No formatting code (__) - must come before underline conversion step
			$row  = preg_replace("/__([^_]+)__/", "`$1`", $row);    
			
			# Underline: ===text=== => __text__ (must come before headings)
			$row  = preg_replace("/===([^=]+)===/", "__$1__", $row);    # underline
			
			# Headings
			$row  = preg_replace("/!!!([^!]+)!!!/", "= $1 =", $row);    # heading level 1
			$row  = preg_replace("/!!([^!]+)!!/", "== $1 ==", $row);    # heading level 2
			$row  = preg_replace("/!([^!]+)!/", "=== $1 ===", $row);    # heading level 3
			
			# Bulleted Lists: (there may be a better way to do this)
			$row  = preg_replace("/^\t\*/", " * ", $row);     			# bullet indented 1
			$row  = preg_replace("/^\t\t\*/", "   * ", $row);     			# bullet indented 2
			$row  = preg_replace("/^\t\t\t\*/", "     * ", $row);     			# bullet indented 3
	
			# Numbered Lists: # => 1.  (note: there may be a better way to do this)
			$row  = preg_replace("/^\t\#/", " 1. ", $row);     			# item indented 1
			$row  = preg_replace("/^\t\t\#/", "   1. ", $row);     			# item indented 2
			$row  = preg_replace("/^\t\t\t\#/", "     1. ", $row);     			# item indented 3
			
			# Line breaks inside lists
#			echo "$row\n";
			if (preg_match("/^(\s*)(1\.|\*)(.*)@@/", $row, $matches)) 
			{
				$leadingSpaces = $matches[1];
				#echo "spaces: ->$leadingSpaces<- \n";
				#echo "before: $row\n";
				$row  = preg_replace("/@@/", "\n$leadingSpaces", $row);  # add two spaces
				#echo "after : $row\n";
			}
			
			# Other line breaks - appearing anywhere else
			$row  = preg_replace("/@@/", " [[BR]] ", $row);
			
			# Attachments: attach: -> attachment:   - Also copy attachments to new wiki
			# does not handle attachments in this format:  attach:"File name with spaces" -- fix those manually
			
			$attachmentPattern = "/attach:([\w.-]+)/"; # this is not a complete filename regex, but works for me!!!
			
			if (preg_match($attachmentPattern, $row, $attachmentMatches))
			{
				# Fix syntax
				$row  = preg_replace($attachmentPattern, "attachment:$1", $row);
				
				# Copy file attachments: note this assumes there is only one attachment per line!!
				$attachmentFilename = $attachmentMatches[1];
				
				$existingLocation = "$inputDir/upload/jsp/$attachmentFilename";
				#echo "existing location: $existingLocation\n";
				
				$newDirectory = "$outputDir/pages/$pageTitle/attachments"; 
				$newLocation = "$newDirectory/$attachmentFilename";
				#echo "new location: $newLocation\n";

				if (!is_dir($newDirectory))
				{
					#echo "making new attachments directory: $newDirectory\n";
					mkdir($newDirectory);
				}
				#echo "current dir: " . getcwd() . "\n";
				echo "\tattachment...$attachmentFilename\n";
				copy($existingLocation, $newLocation);
			}
			
	
			# Horizontal rules - no conversion necessary
			
			# Bold/italic - no conversion necessary


			# Handle line break issue
			# Look at next line
			if ($a+1 < count($array)) # only proceed if there are more lines
			{  
				
				$nextRow = $array[$a+1];
				$emptyRowPattern = "/^\s*$/";
				
				# figure out if we should add a line break - only if all of these conditions are met
				if (!preg_match($emptyRowPattern, $row)               # current row is not empty
					&& !preg_match("/----/", $row)               	  # current row does not have horizontal rule
					&& !preg_match("/=+[^=]+=+/", $row)               # current row is not a heading
					&& !preg_match($emptyRowPattern, $nextRow)        # next row is not empty
					&& !preg_match("/^\t+[\*\#]/", $nextRow)          # next row doesn't start with bullet or numbered item
					&& !preg_match("/##/", $nextRow)           		  # next row doesn't contain table markup
				   )    
				{
					# only if all above conditions are met do we add a break
					$row .= " [[BR]]"; # include space before to prevent "Java:[[BR]] making an Interwiki link, among other things
				}
			}
		}		

	}
	return $array;
}


# Code to fix titles that I did not need
/*	$quoted = array();
	$in_parenthesis = false;
	for ($i = 0; $i < strlen($title[$a]); $i++) 
	{
		$curchar = substr ($title[$a], $i, 1);
		if (ereg('[^a-zA-Z0-9_]', $curchar)) 
		{
			if (!$in_parenthesis) 
			{
				$quoted[] = '(';
				$in_parenthesis = true;
			}
			$quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT);
		} 
		else 
		{
			if ($in_parenthesis) 
			{
				$quoted[] = ')';
				$in_parenthesis = false;
			}
			$quoted[] = $curchar;
		}
	}
	if ($in_parenthesis)
	{
		$quoted[] = ')';
	}
	$title[$a] = implode('', $quoted);
	unset($quoted);
*/
?>

