# -*- coding: iso-8859-1 -*-

import re
from MoinMoin import config
from MoinMoin import wikiutil
from emitter import DocEmitter
from document import DocNode

Dependencies = []

class Parser:
    """
    The class to glue the DocParser and DocEmitter with the
    MoinMoin current API.
    """
    # Enable caching
    caching = 1
    Dependencies = []

    def __init__(self, raw, request, **kw):
        """Create a minimal Parser object with required attributes."""
        self.request = request
        self.form = request.form
        self.raw = raw
        
    def format(self, formatter):
        """Create and call the true parser and emitter."""
        document = DocParser(self.raw, self.request).parse()
        result = DocEmitter(document, formatter, self.request).emit()
        self.request.write(result)

class DocParser:
    """
    Parse the raw text and create a document object
    that can be converted into output using DocEmitter.
    """

    # Table of character sequences to replace with entities:

    # The parsing rules

    wiki_word = r'[A-Z]\w+[A-Z]\w+'

    # For the inline elements
    smiley_rule = r'''(?P<smiley>
        (?<=[\n\s]) %s (?=[\s),;.?!\n])
    )''' % u'|'.join([re.escape(t) for t in config.smileys.keys()])
    wikiword_rule = r'''(?P<wikiword>
        (\/|(\.\.\/)+|^|(?<=[^!])) \b%s\b (\/%s)*
    )'''%(wiki_word, wiki_word)
    interwiki_rule = r'''\b(?P<interwiki>
        \w+:[^/][\w\/]+
    )\b'''
    interwiki2_rule = r'\[(?P<interwiki2>\w+:[^/]\S+)(\s+(?P<interwiki2_text>.+?))?\s*\]'
    attach_rule = r'\b(attachment|inline|drawing):(?P<attach>\S+?)\b'
    attach2_rule = r'\[(attachment|inline|drawing):(?P<attach2>\S+?)\s*(?P<attach2_text>.+?)\s*\]'
    anchor_rule = r'\[(?P<anchor>[#]\S+?)\s+(?P<anchor_text>.*?)\s*\]'
    url_rule = r'(?P<url>\[(?P<url_addr>(http|https|ftp|nntp|news|mailto|telnet|file|irc):[^\s\]]*)\s*(?P<url_text>.*?)\s*\])'
    link_rule = r'(?P<link>\["(?P<link_addr>.+?)"\s*(?P<link_text>.+?)?\s*\])'
    macro_rule = r'\[\[(?P<macro>.+?)(\((?P<macro_arg>.*?)\))?\]\]'

    char_rule = r'(?P<char>.)'
    code_rule = r'(?P<code>{{{(?P<code_text>.*?)}}})'
    code2_rule = r'(?P<code2>`(?P<code2_text>.*?)`)'
    emph_rule = r'''(?P<emph>'')'''
    strong_rule = r"(?P<strong>''')"
    
    # For the block elements
    rule_rule = r'(?P<rule>^----+$)'
    line_rule = r'(?P<line>^\s*$)'
    head_rule = r'(?P<head>^(?P<head_head>=+)\s*(?P<head_text>[^*].*?)\s*(?P=head_head)\s*$)'
    text_rule = r'(?P<text>.+)'
    ulist_rule = r'(?P<ulist>^(?P<ulist_head>\s+[\*])\s+(?P<ulist_text>.*?)$)'
    olist_rule = r'(?P<olist>^(?P<olist_head>\s+[0-9a-zA-Z#][\.\)])\s+(?P<olist_text>.*?)$)'
    dlist_rule = r'(?P<dlist>^\s+(?P<dlist_term>.+?)\s*::\s*)'
    table_rule = r'^\s*(?P<table>(\|\|.+?)+)\|\|\s*$'
    pre_rule = r'(?P<pre>^\s*{{{\s*(\n+\s*)?(?P<pre_text>([\#]!(?P<pre_kind>\S*).*$)?(.|\n)+?)(\n)?}}}\s*$)'

    def __init__(self, raw, request):
        self.request = request
        self._ = request.getText
        self.raw = raw
        self.root = DocNode("document", None)
        self.cur = self.root        # The most recent document node
        self.text = None            # The node to add inline characters to
        
        self.block_rules = '|'.join([
                self.line_rule,
                self.head_rule,
                self.rule_rule,
                self.pre_rule,
                self.ulist_rule,
                self.dlist_rule,
                self.olist_rule,
                self.table_rule,
                self.text_rule,
        ])
        self.block_re = re.compile(self.block_rules, re.X|re.U|re.M)
        self.abbr_rule = self.get_abbr_rule()
        self.inline_rules = '|'.join([
                self.link_rule,
                self.anchor_rule,
                self.url_rule,
                self.macro_rule,
                self.attach_rule,
                self.attach2_rule,
                self.wikiword_rule,
                self.interwiki_rule,
                self.interwiki2_rule,
                self.code_rule,
                self.code2_rule,
                self.strong_rule,
                self.emph_rule,
                self.abbr_rule,
                self.smiley_rule,
                self.char_rule,
        ])
        self.inline_re = re.compile(self.inline_rules, re.X|re.U)

    def get_abbr_rule(self):
        abbr_dict_page = 'AbbreviationDict'
        if self.request.dicts.has_dict(abbr_dict_page):
            self.abbr_dict = self.request.dicts.dict(abbr_dict_page)
        else:
            self.abbr_dict = {}
        return r'''(^|<|(?<=[\s()'`"\[\]&-]))(?P<abbr>%s)(>|$|(?=[\s,.!?()'`":;\[\]&-]))'''%'|'.join([re.escape(k.strip()) for k in self.abbr_dict.keys()+['XXX']])

    # copied from wiki.py
    def _getTableAttrs(self, attrdef):
        # skip "|" and initial "<"
        while attrdef and attrdef[0] == "|":
            attrdef = attrdef[1:]
        if not attrdef or attrdef[0] != "<":
            return {}, ''
        attrdef = attrdef[1:]

        # extension for special table markup
        def table_extension(key, parser, attrs, wiki_parser=self):
            _ = wiki_parser._
            msg = ''
            if key[0] in "0123456789":
                token = parser.get_token()
                if token != '%':
                    wanted = '%'
                    msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
                        'wanted': wanted, 'key': key, 'token': token}
                else:
                    try:
                        dummy = int(key)
                    except ValueError:
                        msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
                            'key': key, 'token': token}
                    else:
                        attrs['width'] = '"%s%%"' % key
            elif key == '-':
                arg = parser.get_token()
                try:
                    dummy = int(arg)
                except ValueError:
                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
                        'arg': arg, 'key': key}
                else:
                    attrs['colspan'] = '"%s"' % arg
            elif key == '|':
                arg = parser.get_token()
                try:
                    dummy = int(arg)
                except ValueError:
                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
                        'arg': arg, 'key': key}
                else:
                    attrs['rowspan'] = '"%s"' % arg
            elif key == '(':
                attrs['align'] = '"left"'
            elif key == ':':
                attrs['align'] = '"center"'
            elif key == ')':
                attrs['align'] = '"right"'
            elif key == '^':
                attrs['valign'] = '"top"'
            elif key == 'v':
                attrs['valign'] = '"bottom"'
            elif key == '#':
                arg = parser.get_token()
                try:
                    if len(arg) != 6: raise ValueError
                    dummy = int(arg, 16)
                except ValueError:
                    msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
                        'arg': arg, 'key': key}
                else:
                    attrs['bgcolor'] = '"#%s"' % arg
            elif key == '=':
                arg = parser.get_token()
                this_key = attrdef.split('=')[0]
                attrs[this_key] = arg
            else:
                msg = ""
            #print "key: %s\nattrs: %s" % (key, str(attrs))
            return msg

        # scan attributes
        attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
        if msg: msg = '<strong class="highlight">%s</strong>' % msg
        #print attr
        return attr, msg
    def _upto(self, node, kinds):
        """
        Look up the tree to the first occurence 
        of one of the listed kinds of nodes or root.
        Start at the node node.
        """
        if not node:
            return None
        while node.parent is not None and not node.kind in kinds:
            node = node.parent
        return node

    # The _*_repl methods called for matches in regexps

    def _macro_repl(self, groups):
        node = DocNode('macro', self.cur, groups.get('macro'))
        node.args = groups.get('macro_arg', None)
        self.text = None
    _macro_arg_repl = _macro_repl

    def _wikiword_repl(self, groups):
        page = groups.get('wikiword', '')
        node = DocNode('page_link', self.cur)
        node.content = page
        DocNode('text', node, page)
        self.text = None
        
    def _interwiki_repl(self, groups):
        page = groups.get('interwiki', 'self:') or groups.get('interwiki2', 'self:')
        text = groups.get('interwiki2_text')
        node = DocNode('interwiki_link', self.cur)
        node.content = page
        DocNode('text', node, text or page.split(':')[1])
        self.text = None
    _interwiki2_repl = _interwiki_repl
    _interwiki2_text_repl = _interwiki_repl


    def _anchor_repl(self, groups):
        addr = groups.get('anchor')
        text = groups.get('anchor_text', '') or addr
        node = DocNode('anchor_link', self.cur, addr)
        DocNode('text', node, text)
        self.text = None
    _anchor_text_repl = _anchor_repl

    def _url_repl(self, groups):
        addr = groups.get('url_addr', ':')
        proto = addr.split(':')[0]
        text = groups.get('url_text', '') or addr
        node = DocNode('external_link', self.cur)
        node.content = addr
        node.proto = proto
        DocNode('text', node, text)
        self.text = None
    _url_text_repl = _url_repl
    _url_addr_repl = _url_repl

    def _attach_repl(self, groups):
        addr = groups.get('attach') or groups.get('attach2') or ':'
        text = groups.get('attach2_text', ':')
        scheme, name = addr.split(':')
        if scheme=='inline':
            scheme='inlined_attachment'
        node = DocNode(scheme, self.cur, name)
        DocNode('text', node, text or addr)
        self.text = None
    _attach2_repl = _attach_repl
    _attach2_text_repl = _attach_repl
        
    def _link_repl(self, groups):
        """Handle all kinds of links, including macros."""
        addr = groups.get('link_addr', '')
        text = (groups.get('link_text', '') or '').strip()
        node = DocNode('page_link', self.cur, addr)
        DocNode('text', node, text or node.content)
        self.text = None
    _link_addr_repl = _link_repl
    _link_text_repl = _link_repl

    def _rule_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))
        DocNode('rule', self.cur)

    def _table_repl(self, groups):
        row = groups.get('table', '||')
        attrs, attrerr = self._getTableAttrs(row)
        self.cur = self._upto(self.cur, ('table', 'document', 'section', 'blockquote'))
        if self.cur.kind!='table':
            self.cur = DocNode('table', self.cur)
            self.cur.attrs = attrs
        tb = self.cur
        tr = DocNode('table_row', tb)
        tr.attrs = attrs
        for cell in row.split('||')[1:]:
            if cell and cell[0]=='<':
                attrs, attrerr = self._getTableAttrs(cell)
                parts = cell[1:].split('>')
                args = parts[0]
                cell = '>'.join(parts[1:])
            else:
                args = None
                attrs = {}
                attrerr =''
            self.cur = DocNode('table_cell', tr)
            self.cur.attrs = attrs
            self.text = None
            self.parse_inline(cell)
        self.cur = tb
        self.text = None

    def _dlist_repl(self, groups):
        self.cur = self._upto(self.cur, ('definition_list', 'document', 'section', 'blockquote'))
        if self.cur.kind!='definition_list':
            self.cur = DocNode('definition_list', self.cur)
        dl = self.cur
        self.cur = DocNode('term', dl)
        self.text = None
        self.parse_inline(groups.get('dlist_term', u''))
        self.cur = DocNode('definition', dl)
        self.text = None
    _dlist_term_repl = _dlist_repl
        
    def _ulist_repl(self, groups):
        bullet = groups.get('ulist_head', '')
        # Find a list with the same bullet up the tree
        lst = self.cur
        while ( lst and 
                not (lst.kind == 'bullet_list' and 
                lst.bullet==bullet) and 
                not lst.kind in ('document','section','blockquote')
            ):
            lst = lst.parent
        if lst and lst.kind=='bullet_list':
            self.cur = lst
        else:
            # Create a new level of list
            self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
            self.cur = DocNode('bullet_list', self.cur)
            self.cur.bullet = bullet
        self.cur = DocNode('list_item', self.cur)
        self.parse_inline(groups.get('ulist_text', ''))
        self.text = None
    _ulist_text_repl=_ulist_repl
    _ulist_head_repl=_ulist_repl

    def _olist_repl(self, groups):
        bullet = groups.get('olist_head', '')
        # Normalize the list number
        bullet = re.sub(r'[0-9]', '0', bullet)
        bullet = re.sub(r'[a-z]', 'a', bullet)
        bullet = re.sub(r'[A-Z]', 'A', bullet)
        # Find a list with the same bullet up the tree
        lst = self.cur
        while lst and not (lst.kind == 'number_list' and lst.bullet==bullet) and not lst.kind in ('document','section','blockquote'):
            lst = lst.parent
        if lst and lst.kind=='number_list':
            self.cur = lst
        else:
            # Create a new level of list
            self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
            self.cur = DocNode('number_list', self.cur)
            self.cur.bullet = bullet
        self.cur = DocNode('list_item', self.cur)
        self.parse_inline(groups.get('olist_text', ''))
        self.text = None
    _olist_text_repl=_olist_repl
    _olist_head_repl=_olist_repl

    def _head_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
        node = DocNode('header', self.cur, groups.get('head_text', '').strip())
        node.level = len(groups.get('head_head', ' '))
    _head_head_repl = _head_repl
    _head_text_repl = _head_repl
   
    def _text_repl(self, groups):
        # No text allowed in those nodes
        if self.cur.kind in ('number_list', 'bullet_list', 'definition_list', 'table', 'table_row'):
            self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
        # Those nodes can have text, but only in paragraphs
        if self.cur.kind in ('document','section','blockquote'):
            self.cur = DocNode('paragraph', self.cur)
        self.parse_inline(groups.get('text', '')+' ')
        self.text = None

    def _pre_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))
        kind = groups.get('pre_kind', None)
        node = DocNode('preformatted', self.cur, groups.get('pre_text', u''))
        node.sect = kind or ''
        self.text = None
    _pre_text_repl = _pre_repl
    _pre_head_repl = _pre_repl
    _pre_kind_repl = _pre_repl
    
    def _line_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))

    def _code_repl(self, groups):
        text = groups.get('code_text', u'') or groups.get('code2_text', u'')
        DocNode('code', self.cur, text)
        self.text = None
    _code_text_repl = _code_repl
    _code2_text_repl = _code_repl
    _code2_repl = _code_repl

    def _emph_repl(self, groups):
        last = self._upto(self.cur, ('emphasis','document'))
        if last.kind=='emphasis':
            self.cur = last.parent or self.root
        else:
            self.cur = DocNode('emphasis', self.cur)
        self.text = None
    _emph_open_repl = _emph_repl
    _emph_close_repl = _emph_repl
  
    def _strong_repl(self, groups):
        last = self._upto(self.cur, ('strong','document'))
        if last.kind=='strong':
            self.cur = last.parent or self.root
        else:
            self.cur = DocNode('strong', self.cur)
        self.text = None
    _strong_open_repl = _strong_repl
    _strong_close_repl = _strong_repl
  
    def _smiley_repl(self, groups):
        word = groups.get('smiley', '')
        DocNode('smiley', self.cur, word)
        
    def _abbr_repl(self, groups):
        abbr = groups.get('abbr', '')
        node = DocNode('abbr', self.cur, abbr)
        node.title = self.abbr_dict.get(abbr, '???')
        self.text = None
  
    def _char_repl(self, groups):
        if self.text is None:
            self.text = DocNode('text', self.cur, u'')
        self.text.content += groups.get('char', u'')
 
    def _replace(self, match):
        """Invoke appropriate _*_repl method. Called for every matched group."""
        groups = match.groupdict()
        for name,text in groups.iteritems():
            if text is not None:
                replace = getattr(self, '_%s_repl'%name)
                replace(groups)
                return

    def parse_inline(self, raw):
        """Recognize inline elements inside blocks."""
        re.sub(self.inline_re, self._replace, raw)
    
    def parse_block(self, raw):
        """Recognize block elements."""
        re.sub(self.block_re, self._replace, raw)
    
    def parse(self):
        self.parse_block(self.raw)
        return self.root

