# -*- coding: iso-8859-1 -*- import re from MoinMoin import config from MoinMoin import wikiutil from emitter import DocEmitter from document import DocNode Dependencies = [] class Parser: """ The class to glue the DocParser and DocEmitter with the MoinMoin current API. """ # Enable caching caching = 1 Dependencies = [] def __init__(self, raw, request, **kw): """Create a minimal Parser object with required attributes.""" self.request = request self.form = request.form self.raw = raw def format(self, formatter): """Create and call the true parser and emitter.""" document = DocParser(self.raw, self.request).parse() result = DocEmitter(document, formatter, self.request).emit() self.request.write(result) class DocParser: """ Parse the raw text and create a document object that can be converted into output using DocEmitter. """ # Table of character sequences to replace with entities: # The parsing rules wiki_word = r'[A-Z]\w+[A-Z]\w+' # For the inline elements smiley_rule = r'''(?P (?<=[\n\s]) %s (?=[\s),;.?!\n]) )''' % u'|'.join([re.escape(t) for t in config.smileys.keys()]) wikiword_rule = r'''(?P (\/|(\.\.\/)+|^|(?<=[^!])) \b%s\b (\/%s)* )'''%(wiki_word, wiki_word) interwiki_rule = r'''\b(?P \w+:[^/][\w\/]+ )\b''' interwiki2_rule = r'\[(?P\w+:[^/]\S+)(\s+(?P.+?))?\s*\]' attach_rule = r'\b(attachment|inline|drawing):(?P\S+?)\b' attach2_rule = r'\[(attachment|inline|drawing):(?P\S+?)\s*(?P.+?)\s*\]' anchor_rule = r'\[(?P[#]\S+?)\s+(?P.*?)\s*\]' url_rule = r'(?P\[(?P(http|https|ftp|nntp|news|mailto|telnet|file|irc):[^\s\]]*)\s*(?P.*?)\s*\])' link_rule = r'(?P\["(?P.+?)"\s*(?P.+?)?\s*\])' macro_rule = r'\[\[(?P.+?)(\((?P.*?)\))?\]\]' char_rule = r'(?P.)' code_rule = r'(?P
{{{(?P.*?)}}})'
    code2_rule = r'(?P`(?P.*?)`)'
    emph_rule = r'''(?P'')'''
    strong_rule = r"(?P''')"
    
    # For the block elements
    rule_rule = r'(?P^----+$)'
    line_rule = r'(?P^\s*$)'
    head_rule = r'(?P^(?P=+)\s*(?P[^*].*?)\s*(?P=head_head)\s*$)'
    text_rule = r'(?P.+)'
    ulist_rule = r'(?P^(?P\s+[\*])\s+(?P.*?)$)'
    olist_rule = r'(?P^(?P\s+[0-9a-zA-Z#][\.\)])\s+(?P.*?)$)'
    dlist_rule = r'(?P^\s+(?P.+?)\s*::\s*)'
    table_rule = r'^\s*(?P(\|\|.+?)+)\|\|\s*$'
    pre_rule = r'(?P^\s*{{{\s*(\n+\s*)?(?P([\#]!(?P\S*).*$)?(.|\n)+?)(\n)?}}}\s*$)'

    def __init__(self, raw, request):
        self.request = request
        self._ = request.getText
        self.raw = raw
        self.root = DocNode("document", None)
        self.cur = self.root        # The most recent document node
        self.text = None            # The node to add inline characters to
        
        self.block_rules = '|'.join([
                self.line_rule,
                self.head_rule,
                self.rule_rule,
                self.pre_rule,
                self.ulist_rule,
                self.dlist_rule,
                self.olist_rule,
                self.table_rule,
                self.text_rule,
        ])
        self.block_re = re.compile(self.block_rules, re.X|re.U|re.M)
        self.abbr_rule = self.get_abbr_rule()
        self.inline_rules = '|'.join([
                self.link_rule,
                self.anchor_rule,
                self.url_rule,
                self.macro_rule,
                self.attach_rule,
                self.attach2_rule,
                self.wikiword_rule,
                self.interwiki_rule,
                self.interwiki2_rule,
                self.code_rule,
                self.code2_rule,
                self.strong_rule,
                self.emph_rule,
                self.abbr_rule,
                self.smiley_rule,
                self.char_rule,
        ])
        self.inline_re = re.compile(self.inline_rules, re.X|re.U)

    def get_abbr_rule(self):
        abbr_dict_page = 'AbbreviationDict'
        if self.request.dicts.has_dict(abbr_dict_page):
            self.abbr_dict = self.request.dicts.dict(abbr_dict_page)
        else:
            self.abbr_dict = {}
        return r'''(^|<|(?<=[\s()'`"\[\]&-]))(?P%s)(>|$|(?=[\s,.!?()'`":;\[\]&-]))'''%'|'.join([re.escape(k.strip()) for k in self.abbr_dict.keys()+['XXX']])

    # copied from wiki.py
    def _getTableAttrs(self, attrdef):
        # skip "|" and initial "<"
        while attrdef and attrdef[0] == "|":
            attrdef = attrdef[1:]
        if not attrdef or attrdef[0] != "<":
            return {}, ''
        attrdef = attrdef[1:]

        # extension for special table markup
        def table_extension(key, parser, attrs, wiki_parser=self):
            _ = wiki_parser._
            msg = ''
            if key[0] in "0123456789":
                token = parser.get_token()
                if token != '%':
                    wanted = '%'
                    msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
                        'wanted': wanted, 'key': key, 'token': token}
                else:
                    try:
                        dummy = int(key)
                    except ValueError:
                        msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
                            'key': key, 'token': token}
                    else:
                        attrs['width'] = '"%s%%"' % key
            elif key == '-':
                arg = parser.get_token()
                try:
                    dummy = int(arg)
                except ValueError:
                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
                        'arg': arg, 'key': key}
                else:
                    attrs['colspan'] = '"%s"' % arg
            elif key == '|':
                arg = parser.get_token()
                try:
                    dummy = int(arg)
                except ValueError:
                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
                        'arg': arg, 'key': key}
                else:
                    attrs['rowspan'] = '"%s"' % arg
            elif key == '(':
                attrs['align'] = '"left"'
            elif key == ':':
                attrs['align'] = '"center"'
            elif key == ')':
                attrs['align'] = '"right"'
            elif key == '^':
                attrs['valign'] = '"top"'
            elif key == 'v':
                attrs['valign'] = '"bottom"'
            elif key == '#':
                arg = parser.get_token()
                try:
                    if len(arg) != 6: raise ValueError
                    dummy = int(arg, 16)
                except ValueError:
                    msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
                        'arg': arg, 'key': key}
                else:
                    attrs['bgcolor'] = '"#%s"' % arg
            elif key == '=':
                arg = parser.get_token()
                this_key = attrdef.split('=')[0]
                attrs[this_key] = arg
            else:
                msg = ""
            #print "key: %s\nattrs: %s" % (key, str(attrs))
            return msg

        # scan attributes
        attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
        if msg: msg = '%s' % msg
        #print attr
        return attr, msg
    def _upto(self, node, kinds):
        """
        Look up the tree to the first occurence 
        of one of the listed kinds of nodes or root.
        Start at the node node.
        """
        if not node:
            return None
        while node.parent is not None and not node.kind in kinds:
            node = node.parent
        return node

    # The _*_repl methods called for matches in regexps

    def _macro_repl(self, groups):
        node = DocNode('macro', self.cur, groups.get('macro'))
        node.args = groups.get('macro_arg', None)
        self.text = None
    _macro_arg_repl = _macro_repl

    def _wikiword_repl(self, groups):
        page = groups.get('wikiword', '')
        node = DocNode('page_link', self.cur)
        node.content = page
        DocNode('text', node, page)
        self.text = None
        
    def _interwiki_repl(self, groups):
        page = groups.get('interwiki', 'self:') or groups.get('interwiki2', 'self:')
        text = groups.get('interwiki2_text')
        node = DocNode('interwiki_link', self.cur)
        node.content = page
        DocNode('text', node, text or page.split(':')[1])
        self.text = None
    _interwiki2_repl = _interwiki_repl
    _interwiki2_text_repl = _interwiki_repl


    def _anchor_repl(self, groups):
        addr = groups.get('anchor')
        text = groups.get('anchor_text', '') or addr
        node = DocNode('anchor_link', self.cur, addr)
        DocNode('text', node, text)
        self.text = None
    _anchor_text_repl = _anchor_repl

    def _url_repl(self, groups):
        addr = groups.get('url_addr', ':')
        proto = addr.split(':')[0]
        text = groups.get('url_text', '') or addr
        node = DocNode('external_link', self.cur)
        node.content = addr
        node.proto = proto
        DocNode('text', node, text)
        self.text = None
    _url_text_repl = _url_repl
    _url_addr_repl = _url_repl

    def _attach_repl(self, groups):
        addr = groups.get('attach') or groups.get('attach2') or ':'
        text = groups.get('attach2_text', ':')
        scheme, name = addr.split(':')
        if scheme=='inline':
            scheme='inlined_attachment'
        node = DocNode(scheme, self.cur, name)
        DocNode('text', node, text or addr)
        self.text = None
    _attach2_repl = _attach_repl
    _attach2_text_repl = _attach_repl
        
    def _link_repl(self, groups):
        """Handle all kinds of links, including macros."""
        addr = groups.get('link_addr', '')
        text = (groups.get('link_text', '') or '').strip()
        node = DocNode('page_link', self.cur, addr)
        DocNode('text', node, text or node.content)
        self.text = None
    _link_addr_repl = _link_repl
    _link_text_repl = _link_repl

    def _rule_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))
        DocNode('rule', self.cur)

    def _table_repl(self, groups):
        row = groups.get('table', '||')
        attrs, attrerr = self._getTableAttrs(row)
        self.cur = self._upto(self.cur, ('table', 'document', 'section', 'blockquote'))
        if self.cur.kind!='table':
            self.cur = DocNode('table', self.cur)
            self.cur.attrs = attrs
        tb = self.cur
        tr = DocNode('table_row', tb)
        tr.attrs = attrs
        for cell in row.split('||')[1:]:
            if cell and cell[0]=='<':
                attrs, attrerr = self._getTableAttrs(cell)
                parts = cell[1:].split('>')
                args = parts[0]
                cell = '>'.join(parts[1:])
            else:
                args = None
                attrs = {}
                attrerr =''
            self.cur = DocNode('table_cell', tr)
            self.cur.attrs = attrs
            self.text = None
            self.parse_inline(cell)
        self.cur = tb
        self.text = None

    def _dlist_repl(self, groups):
        self.cur = self._upto(self.cur, ('definition_list', 'document', 'section', 'blockquote'))
        if self.cur.kind!='definition_list':
            self.cur = DocNode('definition_list', self.cur)
        dl = self.cur
        self.cur = DocNode('term', dl)
        self.text = None
        self.parse_inline(groups.get('dlist_term', u''))
        self.cur = DocNode('definition', dl)
        self.text = None
    _dlist_term_repl = _dlist_repl
        
    def _ulist_repl(self, groups):
        bullet = groups.get('ulist_head', '')
        # Find a list with the same bullet up the tree
        lst = self.cur
        while ( lst and 
                not (lst.kind == 'bullet_list' and 
                lst.bullet==bullet) and 
                not lst.kind in ('document','section','blockquote')
            ):
            lst = lst.parent
        if lst and lst.kind=='bullet_list':
            self.cur = lst
        else:
            # Create a new level of list
            self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
            self.cur = DocNode('bullet_list', self.cur)
            self.cur.bullet = bullet
        self.cur = DocNode('list_item', self.cur)
        self.parse_inline(groups.get('ulist_text', ''))
        self.text = None
    _ulist_text_repl=_ulist_repl
    _ulist_head_repl=_ulist_repl

    def _olist_repl(self, groups):
        bullet = groups.get('olist_head', '')
        # Normalize the list number
        bullet = re.sub(r'[0-9]', '0', bullet)
        bullet = re.sub(r'[a-z]', 'a', bullet)
        bullet = re.sub(r'[A-Z]', 'A', bullet)
        # Find a list with the same bullet up the tree
        lst = self.cur
        while lst and not (lst.kind == 'number_list' and lst.bullet==bullet) and not lst.kind in ('document','section','blockquote'):
            lst = lst.parent
        if lst and lst.kind=='number_list':
            self.cur = lst
        else:
            # Create a new level of list
            self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
            self.cur = DocNode('number_list', self.cur)
            self.cur.bullet = bullet
        self.cur = DocNode('list_item', self.cur)
        self.parse_inline(groups.get('olist_text', ''))
        self.text = None
    _olist_text_repl=_olist_repl
    _olist_head_repl=_olist_repl

    def _head_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
        node = DocNode('header', self.cur, groups.get('head_text', '').strip())
        node.level = len(groups.get('head_head', ' '))
    _head_head_repl = _head_repl
    _head_text_repl = _head_repl
   
    def _text_repl(self, groups):
        # No text allowed in those nodes
        if self.cur.kind in ('number_list', 'bullet_list', 'definition_list', 'table', 'table_row'):
            self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
        # Those nodes can have text, but only in paragraphs
        if self.cur.kind in ('document','section','blockquote'):
            self.cur = DocNode('paragraph', self.cur)
        self.parse_inline(groups.get('text', '')+' ')
        self.text = None

    def _pre_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))
        kind = groups.get('pre_kind', None)
        node = DocNode('preformatted', self.cur, groups.get('pre_text', u''))
        node.sect = kind or ''
        self.text = None
    _pre_text_repl = _pre_repl
    _pre_head_repl = _pre_repl
    _pre_kind_repl = _pre_repl
    
    def _line_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))

    def _code_repl(self, groups):
        text = groups.get('code_text', u'') or groups.get('code2_text', u'')
        DocNode('code', self.cur, text)
        self.text = None
    _code_text_repl = _code_repl
    _code2_text_repl = _code_repl
    _code2_repl = _code_repl

    def _emph_repl(self, groups):
        last = self._upto(self.cur, ('emphasis','document'))
        if last.kind=='emphasis':
            self.cur = last.parent or self.root
        else:
            self.cur = DocNode('emphasis', self.cur)
        self.text = None
    _emph_open_repl = _emph_repl
    _emph_close_repl = _emph_repl
  
    def _strong_repl(self, groups):
        last = self._upto(self.cur, ('strong','document'))
        if last.kind=='strong':
            self.cur = last.parent or self.root
        else:
            self.cur = DocNode('strong', self.cur)
        self.text = None
    _strong_open_repl = _strong_repl
    _strong_close_repl = _strong_repl
  
    def _smiley_repl(self, groups):
        word = groups.get('smiley', '')
        DocNode('smiley', self.cur, word)
        
    def _abbr_repl(self, groups):
        abbr = groups.get('abbr', '')
        node = DocNode('abbr', self.cur, abbr)
        node.title = self.abbr_dict.get(abbr, '???')
        self.text = None
  
    def _char_repl(self, groups):
        if self.text is None:
            self.text = DocNode('text', self.cur, u'')
        self.text.content += groups.get('char', u'')
 
    def _replace(self, match):
        """Invoke appropriate _*_repl method. Called for every matched group."""
        groups = match.groupdict()
        for name,text in groups.iteritems():
            if text is not None:
                replace = getattr(self, '_%s_repl'%name)
                replace(groups)
                return

    def parse_inline(self, raw):
        """Recognize inline elements inside blocks."""
        re.sub(self.inline_re, self._replace, raw)
    
    def parse_block(self, raw):
        """Recognize block elements."""
        re.sub(self.block_re, self._replace, raw)
    
    def parse(self):
        self.parse_block(self.raw)
        return self.root