# -*- coding: iso-8859-1 -*-

import re
from MoinMoin import config, wikiutil

from document import DocNode
from emitter import DocEmitter

Dependencies = []

class Parser:
    """
    The class to glue the DocParser and DocEmitter with the
    MoinMoin current API.
    """
    # Enable caching
    caching = 1
    Dependencies = []

    def __init__(self, raw, request, **kw):
        """Create a minimal Parser object with required attributes."""
        self.request = request
        self.form = request.form
        self.raw = raw
        
    def format(self, formatter):
        """Create and call the true parser and emitter."""
        document = DocParser(self.raw, self.request).parse()
        result = DocEmitter(document, formatter, self.request).emit()
        self.request.write(result)

class DocParser:
    """
    Parse the raw text and create a document object
    that can be converted into output using SheepEmitter.
    """

    # Table of character sequences to replace with entities:
    typo_tab = {
        ' -- ': u' \u2013 ',
        ' --- ': u' \u2014 ',
        '...': u'\u2026',
        '(c)': u'\u00A9',
        '(C)': u'\u00A9',
        '(R)': u'\u00AE',
        '-8<-': u'\u2702',
        '[o ': u'\u260e ',
        '--> ': u'\u261e ',
        '8=X ': u'\u2620 ',
        'd~': u'\u266a',
        '~o': u'\u00B0',
        '[ ]': u'\u2610',
        '[v]': u'\u2611',
        '[x]': u'\u2612',
    }

    # The parsing rules

    # For the inline elements

    inline_tab = {
        'typo': r'%s' % '|'.join([re.escape(t) for t in typo_tab.keys()]),
        'smiley': r'(?<=[\n\s])%s(?=[\s),;.?!\n])' % '|'.join([re.escape(t) for t in config.smileys.keys()]),
        'link': r'\[\[(?P<link_target>.+?)\s*(\|\s*(?P<link_text>.+?)\s*)?]]',
        'char': r'.',
        'code': r'(?P<code_head>`+)(?P<code_text>.*?)(?P=code_head)',
        'quote': r"(?P<quote_open>(?<=\s),,|^,,)|(?P<quote_close>''(?=['\s.,;!?\)\]\}]|$))",
        'emph': r'(?P<emph_open>(?<=\s)\*|^\*)|(?P<emph_close>\*(?=[\s.,;!?\)\]\}]|$))',
        'strong': r'(?P<strong_open>(?<=\s)\*\*|^\*\*)|(?P<strong_close>\*\*(?=[\s.,;!?\)\]\}]|$))',
        'strongemph': r'(?P<strongemph_open>(?<=\s)\*\*\*|^\*\*\*)|(?P<strongemph_close>\*\*\*(?=[\n\s.,;!?\)\]\}]|$))'
    }
    

    typo_rule = r'(?P<typo>%s)' % '|'.join([re.escape(t) for t in typo_tab.keys()])
    smiley_rule = r'(?P<smiley>(?<=[\n\s])%s(?=[\s),;.?!\n]))' % u'|'.join([re.escape(t) for t in config.smileys.keys()])
    link_rule = r'(?P<link>\[\[(?P<link_target>.+?)\s*(\|\s*(?P<link_text>.+?)\s*)?]])'
    char_rule = r'(?P<char>.)'
    code_rule = r'(?P<code>(?P<code_head>`+)(?P<code_text>.*?)(?P=code_head))'
    quote_rule = r'''(?P<quote>(?P<quote_open>(?<=\s),,|^,,)|(?P<quote_close>''(?=['\s.,;!?\)\]\}]|$)))'''
    emph_rule = r'(?P<emph>(?P<emph_open>(?<=\s)\*|^\*)|(?P<emph_close>\*(?=[\s.,;!?\)\]\}]|$)))'
    strong_rule = r'(?P<strong>(?P<strong_open>(?<=\s)\*\*|^\*\*)|(?P<strong_close>\*\*(?=[\s.,;!?\)\]\}]|$)))'
    strongemph_rule = r'(?P<strongemph>(?P<strongemph_open>(?<=\s)\*\*\*|^\*\*\*)|(?P<strongemph_close>\*\*\*(?=[\n\s.,;!?\)\]\}]|$)))'
    # For the block elements
    rule_rule = r'(?P<rule>^----+$)'
    line_rule = r'(?P<line>^\s*$)'
    head_rule = r'(?P<head>^(?P<head_head>\*+)\s+(?P<head_text>[^*].*?)\s+(?P=head_head)\s*$)'
    text_rule = r'(?P<text>.+)'
    list_rule = r'(?P<list>^(?P<list_head>\s+[\*\+-])\s+(?P<list_text>.*?)$)'
    blockquote_rule = r'(?P<blockquote>^(?P<blockquote_head>\"\"+)\s*$(?P<blockquote_text>(.|\n)+?)^(?P=blockquote_head)\s*$)'
    pre_rule = r'(?P<pre>^(?P<pre_head>``+)\s*$(\n)?(?P<pre_text>(^[\#]!(?P<pre_kind>.*?)(\s+.*)?$)?(.|\n)+?)(\n)?^(?P=pre_head)\s*$)'
    sect_rule = r'''(?P<sect>^(?P<sect_head>\*\*+)\s*$
    (\n^[\#]class\s*(?P<sect_kind>.*)\s*$)?
    (\n^[\#]style\s*(?P<sect_style>.*)\s*$)?
    (?P<sect_text>(.|\n)+?)^(?P=sect_head)\s*$)'''
    # For the link targets:
    extern_rule = r'(?P<extern_addr>(?P<extern_proto>http|https|ftp|nntp|news|mailto|telnet|file|irc):.*)'
    attach_rule = r'(?P<attach_scheme>attachment|inline|drawing|image|figure):(?P<attach_addr>.*)'
    inter_rule = r'(?P<inter_wiki>[A-Z][a-zA-Z]+):(?P<inter_page>.*)'
    #u'|'.join(wikimacro.getNames(config))
    macro_rule = r'(?P<macro_name>%s)\((-|(?P<macro_param>.*))\)' % '\w+'
    page_rule = r'(?P<page_name>.*)'

    def __init__(self, raw, request):
        self.request = request
        self.raw = raw
        self.root = DocNode('document', None)
        self.cur = self.root        # The most recent document node
        self.text = None            # The node to add inline characters to
        abbr_dict_page = 'AbbreviationDict'
        if self.request.dicts.has_dict(abbr_dict_page):
            self.abbr_dict = self.request.dicts.dict(abbr_dict_page)
        else:
            self.abbr_dict = {}
        # The 'XXX' is there because an empty rule would always match
        self.abbr_rule = r'''
            (^|<|(?<=[\s()'`"\[\]&-]))
            (?P<abbr>%s)
            (>|$|(?=[\s,.!?()'`":;\[\]&-]))
        ''' % '|'.join([re.escape(k.strip()) for k in self.abbr_dict.keys()+['XXX']])
        self.block_rules = '|'.join([
                self.line_rule,
                self.sect_rule,
                self.head_rule,
                self.rule_rule,
                self.pre_rule,
                self.blockquote_rule,
                self.list_rule,
                self.text_rule,
        ])
        self.block_re = re.compile(self.block_rules, re.X|re.U|re.M)
        self.addr_rules = r'|'.join([
            self.macro_rule,
            self.extern_rule,
            self.attach_rule,
            self.inter_rule,
            self.page_rule,
        ])
        self.addr_re = re.compile(self.addr_rules, re.X|re.U)
        self.inline_rules = r'|'.join([
                self._get_inline_rule('link'),
                self._get_inline_rule('code'),
                self._get_inline_rule('strongemph'),
                self._get_inline_rule('strong'),
                self._get_inline_rule('emph'),
                self._get_inline_rule('quote'),
                self.abbr_rule,
                self._get_inline_rule('smiley'),
                self._get_inline_rule('typo'),
                self._get_inline_rule('char'),
        ])
        self.inline_re = re.compile(self.inline_rules, re.X|re.U)

    def _get_inline_rule(self, rule):
        return r'(?P<%s>%s)' % (rule, self.inline_tab.get(rule, ''))

    def _get_block_rule(self, rule):
        return r'(?P<%s>%s)' % (rule, self.block_tab.get(rule, ''))

    def _upto(self, node, kinds):
        """
        Look up the tree to the first occurence 
        of one of the listed kinds of nodes or root.
        Start at the node node.
        """
        while node.parent is not None and not node.kind in kinds:
            node = node.parent
        return node

    # The _*_repl methods called for matches in regexps

    def _link_repl(self, groups):
        """Handle all kinds of links, including macros."""
        target = groups.get('link_target', '')
        text = (groups.get('link_text', '') or '').strip()
        m = self.addr_re.match(target)
        if m:
            if m.group('page_name'):
                node = DocNode('page_link', self.cur)
                node.content = m.group('page_name')
                DocNode('text', node, text or node.content)
            elif m.group('extern_addr'):
                node = DocNode('external_link', self.cur)
                node.content = m.group('extern_addr')
                node.proto = m.group('extern_proto')
                DocNode('text', node, text or node.content)
            elif m.group('inter_wiki'):
                node = DocNode('interwiki_link', self.cur)
                node.content = '%s:%s'%(m.group('inter_wiki'), m.group('inter_page'))
                DocNode('text', node, text or m.group('inter_page'))
            elif m.group('attach_scheme'):
                scheme = m.group('attach_scheme')
                if scheme=='inline':
                    scheme='inlined_attachment'
                node = DocNode(scheme, self.cur, m.group('attach_addr'))
                DocNode('text', node, text or node.content)
            elif m.group('macro_name'):
                node = DocNode('macro', self.cur, m.group('macro_name'))
                node.args = m.group('macro_param')
            else:
                node = DocNode('bad_link', self.cur)
                node.content = target
                DocNode('text', node, text or target)
        self.text = None
    _link_target_repl = _link_repl
    _link_text_repl = _link_repl

    def _rule_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))
        DocNode('rule', self.cur)

    def _list_repl(self, groups):
        bullet = groups.get('list_head', '')
        # Find a list with the same bullet up the tree
        lst = self.cur
        while lst and not (lst.kind == 'bullet_list' and lst.bullet==bullet) and not lst.kind in ('document','section','blockquote'):
            lst = lst.parent
        if lst and lst.kind=='bullet_list':
            self.cur = lst
        else:
            # Create a new level of list
            self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
            self.cur = DocNode('bullet_list', self.cur)
            self.cur.bullet = bullet
        self.cur = DocNode('list_item', self.cur)
        self.parse_inline(groups.get('list_text', ''))
        self.text = None
    _list_text_repl=_list_repl
    _list_head_repl=_list_repl

    def _head_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
        node = DocNode('header', self.cur, groups.get('head_text', '').strip())
        node.level = len(groups.get('head_head', ' '))
    _head_head_repl = _head_repl
    _head_text_repl = _head_repl
   
    def _text_repl(self, groups):
        if self.cur.kind in ('document','section','blockquote'):
            self.cur = DocNode('paragraph', self.cur)
        self.parse_inline(groups.get('text', '')+' ')
        self.text = None

    def _sect_repl(self, groups):
        kind = groups.get('sect_kind', None)
        node = DocNode('section', self.cur)
        node.sect = kind or ''
        node.style = groups.get('sect_style', None) or ''
        savecur = self.cur
        self.cur = node
        self.text = None
        self.parse_block(groups.get('sect_text', u''))
        self.cur = savecur
        self.text = None
    _sect_text_repl = _sect_repl
    _sect_head_repl = _sect_repl
    _sect_kind_repl = _sect_repl
    _sect_style_repl = _sect_repl

    def _pre_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))
        kind = groups.get('pre_kind', None)
        node = DocNode('preformatted', self.cur, groups.get('pre_text', u''))
        node.sect = kind or ''
        self.text = None
    _pre_text_repl = _pre_repl
    _pre_head_repl = _pre_repl
    _pre_kind_repl = _pre_repl
    
    def _blockquote_repl(self, groups):
        savecur = self._upto(self.cur, ('document','section', 'blockquote'))
        self.cur = DocNode('blockquote', self.cur)
        text = groups.get('blockquote_text', u'')
        self.text = None
        self.parse_block(text)
        self.cur = savecur
        self.text = None
    _blockquote_text_repl = _blockquote_repl
    _blockquote_head_repl = _blockquote_repl

    def _line_repl(self, groups):
        self.cur = self._upto(self.cur, ('document','section','blockquote'))

    def _code_repl(self, groups):
        DocNode('code', self.cur, groups.get('code_text', u'').strip())
        self.text = None
    _code_text_repl = _code_repl
    _code_head_repl = _code_repl

    def _emph_repl(self, groups):
        if groups.get('emph_open', None) is not None and self.cur.kind != 'emphasis':
            self.cur = DocNode('emphasis', self.cur)
            self.text = None
        elif self.cur.kind == 'emphasis':
            self.cur = self._upto(self.cur, ('emphasis',)).parent
            self.text = None
    _emph_open_repl = _emph_repl
    _emph_close_repl = _emph_repl
  
    def _quote_repl(self, groups):
        if groups.get('quote_open', None) is not None:
            self.cur = DocNode('quote', self.cur)
            self.text = None
        else:
            self.cur = self._upto(self.cur, ('quote', 'section', 'paragraph', 'list_item'))
            if self.cur.kind == 'quote':
                self.cur = self.cur.parent or self.root
                self.text = None
            else:
                if self.text is None:
                    self.text = DocNode('text', self.cur, u'')
                self.text.content += groups.get('quote', '')
    _quote_open_repl = _quote_repl
    _quote_close_repl = _quote_repl

  
    def _strong_repl(self, groups):
        if groups.get('strong_open', None) is not None and not self.cur.kind in ('strong', 'emphasis'):
            self.cur = DocNode('strong', self.cur)
            self.text = None
        elif self.cur.kind == 'strong':
            self.cur = self._upto(self.cur, ('strong',)).parent
            self.text = None
    _strong_open_repl = _strong_repl
    _strong_close_repl = _strong_repl
  
    def _strongemph_repl(self, groups):
        if groups.get('strongemph_open', None) is not None and not self.cur.kind in ('strong', 'emphasis'):
            self.cur = DocNode('strong', self.cur)
            self.cur = DocNode('emphasis', self.cur)
            self.text = None
        else:
            if self.cur.kind == 'emphasis':
                self.cur = self._upto(self.cur, ('emphasis',)).parent
            if self.cur.kind == 'strong':
                self.cur = self._upto(self.cur, ('strong',)).parent
            self.text = None
    _strongemph_open_repl = _strongemph_repl
    _strongemph_close_repl = _strongemph_repl

  
    def _smiley_repl(self, groups):
        word = groups.get('smiley', '')
        DocNode('smiley', self.cur, word)
        
    def _typo_repl(self, groups):
        if self.text is None:
            self.text = DocNode('text', self.cur, u'')
        typo = groups.get('typo', u'')
        char = self.typo_tab.get(typo, typo)
        self.text.content += char
        
    def _abbr_repl(self, groups):
        abbr = groups.get('abbr', '')
        node = DocNode('abbr', self.cur, abbr)
        node.title = self.abbr_dict.get(abbr, '???')
        self.text = None
  
    def _char_repl(self, groups):
        if self.text is None:
            self.text = DocNode('text', self.cur, u'')
        self.text.content += groups.get('char', u'')
 
    def _replace(self, match):
        """Invoke appropriate _*_repl method. Called for every matched group."""
        groups = match.groupdict()
        for name,text in groups.iteritems():
            if text is not None:
                replace = getattr(self, '_%s_repl'%name)
                replace(groups)
                return

    def parse_inline(self, raw):
        """Recognize inline elements inside blocks."""
        re.sub(self.inline_re, self._replace, raw)
    
    def parse_block(self, raw):
        """Recognize block elements."""
        re.sub(self.block_re, self._replace, raw)
    
    def parse(self):
        self.parse_block(self.raw)
        return self.root
