"""
MoinMoin header
===============

Header represent wiki page header, which include comments, processing
instructions and pragmas. The header parse text and save all valid
header in the text. Headers are accessed like a dict.

If you want to add custom handling of certain headers, subclass and
add set_xxx methods. The method will be called by the parser for each
found header with that name. To change the default handling of headers,
override setHeader.

Usage
-----
::
    header = Header(request, text)
    # This will triger parsing of text
    language = header['language']

@copyright: 2005 Nir Soffer <nirs@freeshell.org>
@license: GNU GPL, see COPYING for details.
"""

import re
from MoinMoin import i18n


class Header(object):
    """ Immutable wiki page header

    Header represent the wiki page header, using # to add comments,
    processing instructinos and pragmas.
    
    Header parsing is done lazily, on the first accessor call.
    """
    # -----------------------------------------------------------------
    # Creation 

    def __init__(self, request, text):
        self.request = request
        self.text = text
        self._headers = None
        self._length = None
    
    # -----------------------------------------------------------------
    # Accessing

    # Header is accessed like a dict - save a lot of getter methods.
    
    def __getitem__(self, key):
        """ dict style obj[key] handler """
        return self.headers()[key]

    def get(self, key, default=None):
        """ dict style obj.get(key) handler """
        return self.headers().get(key, default)

    def __contains__(self, key):
        """ in obj handler """
        return key in self.headers()
    
    def length(self):
        """ Return length of page header - not number of headers

        Calculate by looking for the first character of the body.
        
        Used internally to do eficient parsing, and may be used by other
        to get only the body part of a page text.
        """
        if self._length is None:
            match = self.bodyPattern().search(self.text)
            if match:
                self._length = match.start()
            else:
                # No body, all header
                self._length = len(self.text)
        return self._length

    # -----------------------------------------------------------------
    # Private

    # Methods here are private, but does not use the _convetion, becasue
    # it is less readable.
    
    def headers(self):
        """ Return headers dict, trigger parsing """
        if self._headers is None:
            self.parse()
        return self._headers

    def parse(self):
        """ Start parsing, triggered automatically """
        self._headers = {}
        for line in self.text[:self.length()].splitlines():
            if line.startswith('##'):
                continue
            key, value = self.splitTokens(line[1:]) 
            setter = getattr(self, 'set_' + key, None)
            if setter:
                setter(value)
            else:
                self.setHeader(key, value)

    def set_acl(self, value):
        """ Save all acl lines in a list """
        try:
            self._headers['acl'].append(value)
        except KeyError:
            self._headers['acl'] = [value]
    
    def set_language(self, value):
        """ Save known languages, ignore other """
        if value in i18n.wikiLanguages():
            self.setHeader('language', value)

    def set_cite(self, value):
        """ Save source for blockquote parser """
        self.setHeader('cite', value)
            
    def set_pragma(self, text):
        """ Save pragmas, ignore invalid pragmas """
        key, value = self.splitTokens(text)
        if value:
            try:
                self._headers['pragma'][key] = value
            except KeyError:
                self._headers['pragma'] = {key: value}
                                                                
    def setHeader(self, key, value):
        """ Set header value, last value override """
        self._headers[key] = value    
    
    def splitTokens(self, text):
        """ Split first two tokens in text by whitespace """
        try:
            key, value = text.split(None, 1)
            value = value.rstrip()
            return key, value
        except ValueError:
            return text, ''

    def bodyPattern(self):
        """ Return shared compiled regular expression """
        myClass = self.__class__
        if not getattr(myClass, '_bodyPattern', False):
            # Does not starts with # or empty (invalid) pi
            myClass._bodyPattern = re.compile(r"^[^\#]|^\#\s",
                                              re.MULTILINE | re.UNICODE)
        return myClass._bodyPattern

