From 48c917b2cb9042aca63c54670b2ccdbe659e00a1 Mon Sep 17 00:00:00 2001
From: Paul Wise <pabs3@bonedaddy.net>
Date: Sat, 20 Apr 2013 13:46:50 +0800
Subject: [PATCH] Implement an incremental dump process

---
 MoinMoin/script/export/dump.py | 83 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 9 deletions(-)

diff --git a/MoinMoin/script/export/dump.py b/MoinMoin/script/export/dump.py
index d770e5b..75d0b08 100644
--- a/MoinMoin/script/export/dump.py
+++ b/MoinMoin/script/export/dump.py
@@ -12,11 +12,16 @@ import sys, os, time, codecs, shutil, re, errno
 from MoinMoin import config, wikiutil, Page, user
 from MoinMoin import script
 from MoinMoin.action import AttachFile
+from MoinMoin.logfile import editlog
+from MoinMoin.util.filesys import touch
 
 url_prefix_static = "."
 logo_html = '<img src="logo.png">'
 HTML_SUFFIX = ".html"
 
+timestamp_text = u'''This is a MoinMoin timestamp file.
+Please delete it to rebuild all pages.
+'''
 page_template = u'''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
@@ -126,6 +131,28 @@ General syntax: moin [options] export dump [dump-options]
             help = "User the dump will be performed as (for ACL checks, etc)"
         )
 
+    def changed_pages(self, request, outputdir, timestamp):
+        update = set()
+        delete = set()
+        ignore = set()
+        version = wikiutil.timestamp2version(timestamp)
+        log = editlog.EditLog(request)
+        for line in log:
+            if not request.user.may.read(line.pagename):
+                ignore.add(line.pagename)
+            elif line.ed_time_usecs < version:
+                ignore.add(line.pagename)
+            elif line.action == 'ATTDEL':
+                name = os.path.join(outputdir, "attachments", wikiutil.quoteWikinameFS(line.pagename), line.extra)
+                url = "attachments/%s/%s" % (wikiutil.quoteWikinameFS(line.pagename), line.extra)
+                delete.add((name, url))
+            elif line.action == 'SAVE/RENAME':
+                update.add(line.pagename)
+                update.add(line.extra)
+            else:
+                update.add(line.pagename)
+        return (ignore, delete, update)
+
     def mainloop(self):
         """ moin-dump's main code. """
 
@@ -157,8 +184,41 @@ General syntax: moin [options] export dump [dump-options]
         # use this user for permissions checks
         request.user = user.User(request, name=self.options.dump_user)
 
-        pages = request.rootpage.getPageList(user='') # get list of all pages in wiki
+        # Check the last update timestamp
+        timestamp = None
+        timestamp_file = os.path.join(outputdir, 'moin-last-update')
+        try:
+            timestamp = os.stat(timestamp_file).st_mtime
+        except OSError, err:
+            if err.errno == errno.ENOENT:
+                with open(timestamp_file, 'w') as f:
+                    f.write(timestamp_text)
+            else:
+                script.fatal("Cannot check last update time of '%s'!" % timestamp_file)
+
+        # Setup some helper functions
+        wikiutil.quoteWikinameURL = lambda pagename, qfn=wikiutil.quoteWikinameFS: (qfn(pagename) + HTML_SUFFIX)
+        AttachFile.getAttachUrl = lambda pagename, filename, request, **kw: _attachment(request, pagename, filename, outputdir, **kw)
+
+        # Get a list of pages
+        if timestamp:
+            touch(timestamp_file)
+            ignore, delete, render = self.changed_pages(request, outputdir, timestamp)
+            pages = list(render)
+            for (name, url) in delete:
+                try:
+                    os.remove(name)
+                except OSError, err:
+                    if err.errno != errno.ENOENT:
+                        script.fatal("Cannot remove '%s'!" % url)
+                script.log('Removed "%s"...' % url)
+            if ignore:
+                script.log('Ignored %s changes before the last export dump' % len(ignore))
+        else:
+            pages = request.rootpage.getPageList(user='') # get list of all pages in wiki
         pages.sort()
+
+        # Filter the list of pages
         if self.options.page: # did user request a particular page or group of pages?
             try:
                 namematch = re.compile(self.options.page)
@@ -168,10 +228,6 @@ General syntax: moin [options] export dump [dump-options]
             except:
                 pages = [self.options.page]
 
-        wikiutil.quoteWikinameURL = lambda pagename, qfn=wikiutil.quoteWikinameFS: (qfn(pagename) + HTML_SUFFIX)
-
-        AttachFile.getAttachUrl = lambda pagename, filename, request, **kw: _attachment(request, pagename, filename, outputdir, **kw)
-
         errfile = os.path.join(outputdir, 'error.log')
         errlog = open(errfile, 'w')
         errcnt = 0
@@ -188,11 +244,21 @@ General syntax: moin [options] export dump [dump-options]
         for pagename in pages:
             # we have the same name in URL and FS
             file = wikiutil.quoteWikinameURL(pagename)
-            script.log('Writing "%s"...' % file)
+            filepath = os.path.join(outputdir, file)
+            request.url = urlbase + pagename # add current pagename to url base
+            page = Page.Page(request, pagename)
+            if page.exists():
+                script.log('Writing "%s"...' % file)
+            else:
+                try:
+                    os.remove(filepath)
+                except OSError, err:
+                    if err.errno != errno.ENOENT:
+                        script.fatal("Cannot remove '%s'!" % file)
+                script.log('Removed "%s"...' % file)
+                continue
             try:
                 pagehtml = ''
-                request.url = urlbase + pagename # add current pagename to url base
-                page = Page.Page(request, pagename)
                 request.page = page
                 try:
                     request.reset()
@@ -206,7 +272,6 @@ General syntax: moin [options] export dump [dump-options]
                     traceback.print_exc(None, errlog)
             finally:
                 timestamp = time.strftime("%Y-%m-%d %H:%M")
-                filepath = os.path.join(outputdir, file)
                 fileout = codecs.open(filepath, 'w', config.charset)
                 fileout.write(page_template % {
                     'charset': config.charset,
-- 
1.8.2.1

