#!/usr/bin/env python

"""
Copyright (c) 2005 Guy K. Kloss <guy.kloss@dlr.de>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

 See also http://www.fsf.org

------------------------------------------------------------------------
PmWikiMoinMoinConverter

Inspired by the PhpWikiMoinMoinConverter by "The Anarcat".

= Usage =

Call this script directly from the wiki directory. You must have write
access to ./data and subdirectories to create pages. You will need to
change the CONFIGURATION DIRECTIVES, below.

= Special Considerations =

This script will happily destroy existing wikis if it feels like it,
so backups are of course advised before performing a conversion. Note
that this script will just create the new pages, and overwrite any 
existing pages, so you better backup or die.

= Limitations =

This script is also crucially incomplete, and most definitely lacks
several PmWiki and MoinMoin features. Some are due to limitations of
the wiki, others are due to the inherent ambiguity of the wiki syntax,
and still others are just due to the fact that converters have not been
implemented, yet.

Additionally, this script has been created for the sake of converting a
wiki site running PmWiki 0.5.27, current versions are around 2.0.x ...

A summary of some limitations:

 * linking
   * not all link styles are supported (e. g. "free links" with suppressions)
   * "free links" may be flaky due to name conversion changes in the two systems
   * wiki pages containing "special characters" (e. g. "-") will need to be
     renamed manually (in the file system)
   * link anchors are not implemented
   * WikiWords prepended by a number, underscore, etc. will still be inter
     preted as a WikiWord (though without the "prefix")
   * on PmWiki's "HomePage" wiki links do not point relatively to the
     WikiGroups corresponging pages, as PmWiki's "HomePage" is a sub
     page to the WikiGroup, whereas MoinMoin can attache content to the
     group itself
   * simple WikiWords that produce links in PmWiki may not produce wiki
     links in MoinMoin (words with double caps, numbers, or words ending
     in caps)
 * wiki page includes
 * table syntax is messy, and will not be converted, but warned about
 * quite some macros might not work
 * in case of existing wiki pages, it does not edit the content, but it
   actually replaces the content with ID 1 and sets the ID to 1
 * no automatic migration of attachments and images
 * fancy formatings in term/definition lists will screw up the list
 * MoinMoin doesn't support wiki syntax e. g. in head lines

= Resources =

 * MoinMoin Wiki syntax: http://moinmoin.wikiwikiweb.de/HelpOnEditing
 * PmWiki syntax from setup site in PmWiki setup: PmWiki.DocumentationIndex
"""

# CONFIGURATION DIRECTIVES
#
# The path to the MoinMoin wiki, leave empty if you know what you're
# doing
moinWikiPath = '/tmp/moin-desktop'

# The path to the PmWiki directory:
pmWikiPath = '/tmp/pmwiki'

# PmWiki server:
pmWikiHost = 'thor.sistec.kp.dlr.de'
pmWikiBase = '/wiki/pmwiki.php'

moinAcl = ''# '#acl VkAdminGroup:read,write,delete,revert,admin VkUserGroup:read,write,delete,revert All:'

# By default, we do not edit existing wiki pages, to avoid conflict.
#
# This will override this behavior and allow the edition of those
# pages. normally, a new revision should be created, so this is pretty
# safe.
editExistingPages = True

# 
# END OF CONFIGURATION DIRECTIVES

import re
import os
import os.path
import sys
import httplib

class PmConverter:
    # Warning list.
    # Aabout these things the user should be warned after conversion to manually
    # fix certain things.
    warnings = [['\|\|', # tables: would be a conversion mess
                 'Tables in PmWiki, look at http://moinmoin.wikiwikiweb.de/HelpOnTables'],
                ['\[\[table', # advanced tables, more messy
                 'Advanced tables in PmWiki, look at http://moinmoin.wikiwikiweb.de/HelpOnTables'],
                ['{{(.+?)}}',
                 '"Free links" may have different names on the system.'],
                ['HomePage',
                 'No checks for HomePage -> FrontPage conversion.']
                ]
    
    # Markup conversion list.
    # (Note: The order within this list, and thus the conversion order,
    # may be critical on some conversions, e. g. on head lines.).
    converters = [### --- block conversions --------------------------------
                  {'lable': 'vb', # verbatim block
                   'converter':
                   [{'search': '^ +\[=(.*?)=\]',
                     'replace': r'%%vb<\1>vb%%'}]},
                  {'lable': 'nb', # remove line break preventions with new line
                   'converter':
                   [{'search': r'\\$\n',
                     'replace': r''}]},
                  ### --- complete line conversions ------------------------
                  {'lable': 'vl', # verbatim line
                   'converter':
                   [{'search': '^ (.*?)$',
                     'replace': r'%%vl<\1>vl%%'},
                    {'search': '%%vl<(.*?)>vl%%',
                     'replace': r'`\1`'}]},
                  {'lable': 'h4', # heading 4
                   'converter':
                   [{'search': '^!!!! *?(.*?)$',
                     'replace': r'%%h4<\1>h4%%'},
                    {'search': '%%h4<(.*?)>h4%%',
                     'replace': r'==== \1 ===='}]},
                  {'lable': 'h3', # heading 3
                   'converter':
                   [{'search': '^!!! *?(.*?)$',
                     'replace': r'%%h3<\1>h3%%'},
                    {'search': '%%h3<(.*?)>h3%%',
                     'replace': r'=== \1 ==='}]},
                  {'lable': 'h2', # heading 2
                   'converter':
                   [{'search': '^!! *?(.*?)$',
                     'replace': r'%%h2<\1>h2%%'},
                    {'search': '%%h2<(.*?)>h2%%',
                     'replace': r'== \1 =='}]},
                  {'lable': 'h1', # heading 1
                   'converter':
                   [{'search': '^! *?(.*?)$',
                     'replace': r'%%h1<\1>h1%%'},
                    {'search': '%%h1<(.*?)>h1%%',
                     'replace': r'= \1 ='}]},
                  {'lable': 'b4', # bullet 4
                   'converter':
                   [{'search': '^\*\*\*\* *?(.*?)$',
                     'replace': r'%%b4<\1>b4%%'},
                    {'search': '%%b4<(.*?)>b4%%',
                     'replace': r'    * \1'}]},
                  {'lable': 'b3', # bullet 3
                   'converter':
                   [{'search': '^\*\*\* *?(.*?)$',
                     'replace': r'%%b3<\1>b3%%'},
                    {'search': '%%b3<(.*?)>b3%%',
                     'replace': r'   * \1'}]},
                  {'lable': 'b2', # bullet 2
                   'converter':
                   [{'search': '^\*\* *?(.*?)$',
                     'replace': r'%%b2<\1>b2%%'},
                    {'search': '%%b2<(.*?)>b2%%',
                     'replace': r'  * \1'}]},
                  {'lable': 'b1', # bullet 1
                   'converter':
                   [{'search': '^\* *?(.*?)$',
                     'replace': r'%%b1<\1>b1%%'},
                    {'search': '%%b1<(.*?)>b1%%',
                     'replace': r' * \1'}]},
                  {'lable': 'n4', # numbering 4
                   'converter':
                   [{'search': '^#### *?(.*?)$',
                     'replace': r'%%n4<\1>n4%%'},
                    {'search': '%%n4<(.*?)>n4%%',
                     'replace': r'    1. \1'}]},
                  {'lable': 'n3', # numbering 3
                   'converter':
                   [{'search': '^### *?(.*?)$',
                     'replace': r'%%n3<\1>n3%%'},
                    {'search': '%%n3<(.*?)>n3%%',
                     'replace': r'   1. \1'}]},
                  {'lable': 'n2', # numbering 2
                   'converter':
                   [{'search': '^## *?(.*?)$',
                     'replace': r'%%n2<\1>n2%%'},
                    {'search': '%%n2<(.*?)>n2%%',
                     'replace': r'  1. \1'}]},
                  {'lable': 'n1', # numbering 1
                   'converter':
                   [{'search': '^# *?(.*?)$',
                     'replace': r'%%n1<\1>n1%%'},
                    {'search': '%%n1<(.*?)>n1%%',
                     'replace': r' 1. \1'}]},
                  {'lable': 'i4', # indention 4
                   'converter':
                   [{'search': '^:::: :(.*?)$',
                     'replace': r'%%i4<\1>i4%%'},
                    {'search': '%%i4<(.*?)>i4%%',
                     'replace': r'       \1'}]},
                  {'lable': 'i3', # indention 3
                   'converter':
                   [{'search': '^::: :(.*?)$',
                     'replace': r'%%i3<\1>i3%%'},
                    {'search': '%%i3<(.*?)>i3%%',
                     'replace': r'     \1'}]},
                  {'lable': 'i2', # indention 2
                   'converter':
                   [{'search': '^:: :(.*?)$',
                     'replace': r'%%i2<\1>i2%%'},
                    {'search': '%%i2<(.*?)>i2%%',
                     'replace': r'   \1'}]},
                  {'lable': 'i1', # indention 1
                   'converter':
                   [{'search': '^: :(.*?)$',
                     'replace': r'%%i1<\1>i1%%'},
                    {'search': '%%i1<(.*?)>i1%%',
                     'replace': r' \1'}]},
                  {'lable': 'td', # term/definition
                   'converter':
                   [{'search': '^::([^: ]+[^:]*?):(.*?)$',
                     'replace': r'%%td< \1::\2>td%%'},
                    {'search': '%%td<(.*?)>td%%',
                     'replace': r'\1'}]},
                  {'lable': 'hr', # horizontal rule
                   'converter':
                   [{'search': '^-----*?(.*?)',
                     'replace': r'----\1'}]},
                  
                  ### --- in-line conversions ------------------------------
                  ## --- formatting ---
                  {'lable': 'br', # line break
                   'converter':
                   [{'search': '\[\[<<\]\]',
                     'replace': r'%%ma<BR>ma%%'}]},
                  {'lable': 'cd', # monospaced, no wiki highlighting
                   'converter':
                   [{'search': '@@\[=(.*?)=\]@@',
                     'replace': r'%%cd<\1>cd%%'},
                    {'search': '%%cd<(.*?)>cd%%',
                     'replace': r'{{{\1}}}'}]},
                  {'lable': 'tt', # monospaced (also no wiki highlighting)
                   'converter':
                   [{'search': '@@(.*?)@@',
                     'replace': r'%%tt<\1>tt%%'},
                    {'search': '%%tt<(.*?)>tt%%',
                     'replace': r'`\1`'}]},
                  {'lable': 'nh', # don't know how to suppress wiki syntax interpretation any better, not nice, though ...
                   'converter':
                   [{'search': '\[=(.*?)=\]',
                     'replace': r'%%nh<\1>nh%%'},
                    {'search': '%%nh<(.*?)>nh%%',
                     'replace': r'`\1`'}]},
                  # bold is equivalent "'''" (the same)
                  # italics is equivalent "''" (the same)
                  # underline: not present in PmWiki
                  {'lable': 'fl', # larger
                   'converter':
                   [{'search': '\[\+(.*?)\+\]',
                     'replace': r'%%fl<\1>fl%%'},
                    {'search': '%%fl<(.*?)>fl%%',
                     'replace': r'~+\1+~'}]},
                  {'lable': 'fs', # smaller
                   'converter':
                   [{'search': '\[-(.*?)-\]',
                     'replace': r'%%fs<\1>fs%%'},
                    {'search': '%%fs<(.*?)>fs%%',
                     'replace': r'~-\1-~'}]},
                  {'lable': 'su', # superscript
                   'converter':
                   [{'search': '\^\^(.*?)\^\^',
                     'replace': r'%%su<\1>su%%'},
                    {'search': '%%su<(.*?)>su%%',
                     'replace': r'^\1^'}]},
                  {'lable': 'sb', # subscript
                   'converter':
                   [{'search': '__(.*?)__',
                     'replace': r'%%sb<\1>sb%%'},
                    {'search': '%%sb<(.*?)>sb%%',
                     'replace': r',,\1,,'}]},

                  ## --- linking ---
                  # WikiWord is the same, a good regex for them in PmWiki: '(([A-Z][a-z0-9]*){2,})'
                  {'lable': 'ld1', # [[WikiGroup/WikiWord descriptive text]] or [[WikiWord descriptive text]]
                   'converter':
                   [{'search': '\[\[(?P<link>([A-Za-z0-9]*)(/[A-Za-z0-9]*)?) (?P<desc>.+?)\]\]',
                     'replace': r'%%ld<:../\g<link>:\g<desc>>ld%%'}]},
                  {'lable': 'ld2', # [[WikiGroup.WikiWord descriptive text]]
                   'converter':
                   [{'search': '\[\[(?P<group>[A-Za-z0-9]*)\.(?P<word>[A-Za-z0-9]*) (?P<desc>.+?)\]\]',
                     'replace': r'%%ld<:../\g<group>/\g<word>:\g<desc>>ld%%'}]},
                  {'lable': 'lf1', # free links w/ WikiGroups
                   'converter':
                   [{'search': '(?P<group>([A-Z][a-z0-9]*){2,})[./]{{(?P<word>.+?)}}',
                     'replace': r'%%lf</\g<group>/\g<word>>lf%%'}]},
                  {'lable': 'lf2', # free links
                   'converter':
                   [{'search': '([^{./]){{(.+?)}}([^}])',
                     'replace': r'\1%%lf<../\2>lf%%\3'}]},
                  # URL includes (images, too) work out of the box
                  {'lable': 'lu', # URL w/ alternative link text
                   'converter':
                   [{'search': '\[\[((http|ftp|mailto)\S+) (.+?)\]\]',
                     'replace': r'%%ld<\1 \3>ld%%'}]},
                  {'lable': 'afd', # attached file w/ alternative link text
                   'converter':
                   [{'search': '\[\[Attach:(\S+?) (.+?)\]\]',
                     'replace': r'%%ld<attachment:../\1 \2>ld%%'}]},
                  {'lable': 'ai', # inline attached images
                   'converter':
                   [{'search': 'Attach:(\S+?\.(jpg|jpeg|png|gif)) ',
                     'replace': r'inline:../\1 '}]},
                  {'lable': 'af', # attached files
                   'converter':
                   [{'search': 'Attach:(\S+?) ',
                     'replace': r'attachment:../\1 '}]},
                  {'lable': 'hp', # HomePage (wild removal without checks for any exceptions)
                   'converter':
                   [{'search': '[./]HomePage',
                     'replace': r''}]},
                  {'lable': 'ls', # simple WikiWord link
                   'converter':
                   [{'search': '([^/\w.:<["])([A-Z][a-z]([A-Z][a-z0-9]*)+)',
                     'replace': r'\1%%li<../\2>li%%'}]},
                  ## --- macros ---
                  {'lable': 'ma', # list of attachments for current page
                   'converter':
                   [{'search': '\[\[\$AttachList\]\]',
                     'replace': r'%%ma<AttachList>ma%%'}]},
                  {'lable': 'ms', # search field
                   'converter':
                   [{'search': '\[\[\$Search\]\]',
                     'replace': r'%%ma<FullSearch>ma%%'}]},

                  ## --- cleanups, fixes, and finalizations ---
                  # possible link correction for certain wiki links
                  {'lable': 'x1', # WikiWord ending in cap (e. g. "LaTeX")
                   'converter':
                   [{'search': '%%li<(.+?[A-Z])>li%%',
                     'replace': r'%%lf<\1>lf%%'}]},
                  {'lable': 'x2', # WikiGroup ending in cap (e. g. "LaTeX")
                   'converter':
                   [{'search': '%%li<(.+?[A-Z]/.+?)>li%%',
                     'replace': r'%%lf<\1>lf%%'}]},
                  {'lable': 'x3', # WikiWord w/ double caps may not link (e. g. "WikiWWord")
                   'converter':
                   [{'search': '%%li<(.*?[A-Z]{2,}.*?)>li%%',
                     'replace': r'%%lf<\1>lf%%'}]},
                  {'lable': 'x4', # WikiWord w/ numbers may not link (e. g. "Wiki2wiki")
                   'converter':
                   [{'search': '%%li<(.*?[0-9].*?)>li%%',
                     'replace': r'%%lf<\1>lf%%'}]},

                  # other finalizations
                  {'lable': 'ym', # put double brackets around macros
                   'converter':
                   [{'search': '', # intentionally blank, should trigger in second pass
                     'replace': r''},
                    {'search': '%%ma<(.*?)>ma%%',
                     'replace': r'[[\1]]'}]},
                  {'lable': 'yv', # put "{{{" and "}}}" around verbatim blocks
                   'converter':
                   [{'search': '%%vb<(.*?)>vb%%',
                     'replace': r'{{{\1}}}'}]},

                  # link finalization
                  {'lable': 'z0', # Wiki link change "." to "/"
                   'converter':
                   [{'search': '%%li<(.*?)\.(.*?)>li%%',
                     'replace': r'%%li<\1/\2>li%%'}]},
                  {'lable': 'z1', # Wiki link finalization
                   'converter':
                   [{'search': '', # intentionally blank, should trigger in second pass
                     'replace': r''},
                    {'search': '%%li<(.*?)>li%%',
                     'replace': r'\1'}]},
                  {'lable': 'z2', # Wiki free link finalization
                   'converter':
                   [{'search': '', # intentionally blank, should trigger in second pass
                     'replace': r''},
                    {'search': '%%lf<(.*?)>lf%%',
                     'replace': r'["\1"]'}]},
                  {'lable': 'z3', # Wiki link w/ description finalization
                   'converter':
                   [{'search': '', # intentionally blank, should trigger in second pass
                     'replace': r''},
                    {'search': '%%ld<(.*?)>ld%%',
                     'replace': r'[\1]'}]},
                  {'lable': 'z4', # WikiGroup.WikiWord (brutally do this "Word.Word" -> "["/Word/Word"]")
                   'converter':
                   [{'search': '', # intentionally blank, should trigger in second pass
                     'replace': r''},
                    {'search': '([A-Z][a-zA-Z0-9]*)\.([A-Z][a-zA-Z0-9]*)',
                     'replace': r'["/\1/\2"]'}]}
                  ]
    
    # Fix for straying wiki syntax within a verbatim block
    verbatimBlockFix = {'find': re.compile('^%%vb<(.*?)>vb%%', re.MULTILINE | re.DOTALL),
                        'kill': re.compile('%%[a-z][a-z0-9]<|>[a-z][a-z0-9]%%', re.MULTILINE | re.DOTALL)}


    def __init__(self):
        self.warningMessages = []
        self.moinWikiPages = os.path.join(moinWikiPath, 'wiki', 'data', 'pages')

        self.actions = []

        # Generate action list and compile regular expressions:
        for converter in self.converters:
            for i in range(len(converter['converter'])):
                item = converter['converter'][i]
                item['search'] = re.compile(item['search'], re.MULTILINE | re.DOTALL)
                action = {}
                action.update(item)
                action['lable'] = converter['lable']
                if (i+1) > len(self.actions):
                    self.actions.append([])
                self.actions[i].append(action)
        
        for item in self.warnings:
            item[0] = re.compile(item[0], re.MULTILINE | re.DOTALL)

        # Some variables to ease the processing:
        self.currentPage = []
        self.currentPageName = ''


    def convertPages(self):
        """
        The whole conversion magic.
        """
        pages = self.getWikiPages()

        print 'Processing ...'
        for page in pages:
            print ' ', '/'.join(page)

            self.currentPage = []
            self.currentPage.extend(page)
            if self.currentPage[1] == 'HomePage':
                self.currentPage.pop(1)
                self.warningMessages.append(self.currentPage
                                            + ['Wiki links to and from converted WikiGroup.HomePage might be broken.'])
            self.currentPageName = '(2f)'.join(self.currentPage)
            
            # Do the actual parsing of this page and save it.
            text = self.blockParser(self.getSource(page))

            # Now put the converted page into MoinMoin.
            self.makePage(text)

        print '\n\nWarnings: '
        self.warningMessages.sort()
        for item in  self.warningMessages:
            print '%s: %s' % (item[0], item[1])
        

    def getWikiPages(self):
        """
        Retrieves a list of all still relevant pages, each in the form
        [WikiGroup, WikiWord].
        """
        allPages = os.listdir(os.path.join(pmWikiPath, 'wiki.d'))
        relevantPages = []
    
        for page in allPages:
            fragments = page.split(',')
            if len(fragments) == 1:
                fragments = fragments[0].split('.')
                if fragments[1] not in ('htaccess', 'flock', 'mailposts',
                                        'RecentChanges', 'RecentUploads',
                                        'AllRecentChanges', 'AllRecentUploads',
                                        'SearchWiki', 'WebMenu'):
                    relevantPages.append(fragments)
        
        return relevantPages

    
    def getSource(self, page):
        """
        Uses a HTTP request to the installed PmWiki to retrieve the
        wiki source of the individual page.
        """
        wikiGroup, wikiPage = page
        url = '/'.join((pmWikiBase, wikiGroup, wikiPage)) + '?action=source'
        connection = httplib.HTTPConnection('thor.sistec.kp.dlr.de')
        connection.request('GET', url)
        response = connection.getresponse()
        if response.status != 200:
            self.warningMessages.append(page + ['Could retrieve the original wiki page.'])
            print 'HTTP error:', response.status, response.reason,
        source = response.read()
        connection.close()
    
        return source.strip()
    

    def blockParser(self, text):
        """
        The block parser deals with the whole text to be converted.
        It will call the line parser for each line in the text.
        """
        # Check for things to be aware of and warn about later on.
        for warning in self.warnings:
            if warning[0].search(text):
                self.warningMessages.append(['/'.join(self.currentPage), warning[1]])
        
        # Get some block conversion done before converting on line level.
        for conversionPass in self.actions:
            for conversion in conversionPass:
                # Repair errors in verbatim block conversion
                if conversion['lable'] == 'yv' and conversion['replace']:
                    text = self.verbatimBlockFix['find'].sub(self._removeCurlyBrackets, text)

                text = conversion['search'].sub(conversion['replace'], text)

        return text


    def _removeCurlyBrackets(self, match):
        """
        This one removes the unwanted straying "{{{" and "}}}" from verbatim blocks.
        """
        return '%%%%vb<%s>vb%%%%' % self.verbatimBlockFix['kill'].sub('', match.group())
    

    def makePage(self, text):
        currentFilePath = os.path.join(self.currentPageName, 'current')
        revisionsPath = os.path.join(self.currentPageName, 'revisions')
        revision = '00000001'
        contentFilePath = os.path.join(revisionsPath, revision)

        # Overwriting pages if selecting only some.
        if not os.path.exists(self.currentPageName) or editExistingPages:
            if not os.path.exists(self.currentPageName):
                try:
                    os.mkdir(self.currentPageName)
                    os.mkdir(revisionsPath)
                except OSError, err:
                    self.warningMessages.append('/'.join(self.currentPage)
                                                + ['Could not create a directory needed for the wiki page.'])
                    print err,

            text = unicode(text, "latin1").encode("utf8")
            try:
                # This will be the page revision ID.
                fileHandler = open(currentFilePath, 'w')
                fileHandler.write(revision)
                fileHandler.close()
                # This will be the content for that ID.
                fileHandler = open(contentFilePath, 'w')
                fileHandler.write(moinAcl + '\n' + text)
                fileHandler.close()
            except Exception, err:
                self.warningMessages.append('/'.join(self.currentPage)
                                            + ['Could not write the content of the wiki page.'])
                print err,
        else:
            print '(*** already exists, skipping ***)',

if __name__ == '__main__':
    converter = PmConverter()
    converter.convertPages()

    
