#!/usr/bin/env python
""" Find texts in python source tree using Python compiler package
    
    Usage: findtext.py path
    
    Find all calls to gettext function in the source tree and collect the 
    texts in a dict. Use compiler to create an abstract syntax tree from 
    each source file, then find the nodes for gettext function call, and 
    get the text from the call.
         
    Localized texts are used usually translated during runtime by gettext
    functions and apear in the source as _('text...'). TextFinder class
    finds calls to the '_' function in any namespace, or your prefered
    gettext function.
    
    Note that TextFinder will only retrive text from function calls with 
    a constant argument like _("text"). Calls like _("text" % locals()),
    _("text 1" + "text 2") are marked as bad call in the report, and the 
    text is not retrived into the dictionary.
    
    Note also that texts in source can appear several times in the same file
    or different files, but they will only apear once in the dictinary that
    this tool creates.
    
    What is missing from this tool is the rather simple machinary to create 
    a language file from the dictionary. This machinary exist allready in
    MoinMoin [http://moin.sf.net] which was tool was written for.
    
       
    findtext - Find texts in python source tree
    
    Copyright (C) 2003 Nir Soffer
    
    Based on code by Seo Sanghyeon and the python compiler package.
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details:
    http://www.gnu.org/licenses/gpl.html
"""

import sys
import os

import compiler
from compiler.ast import Name, Const, CallFunc, Getattr


class TextFinder:
    """ Walk through AST tree and collect text from gettext calls """
    
    def __init__(self, name='_'):
        """ Init with the gettext function name or '_' 
        
            Each time a text is found, we check if we allready have it in the 
            dictionary. If we have it, we count the item as duplicate.
        """
        self._name = name       # getText function name
        self._dictionary = {}   # Unique texts in the found texts
        self._found = 0         # All good calls including duplicates
        self._bad = 0           # Bad calls: _('%s' % var) or _('a' + 'b')
    
    def visitModule(self, node):
        """ Start the visit from the top level 
        
            Reset node cache. Node cache is used to prevent us from visiting
            the same node twice.
        """
        self._visited = {}
        self.walk(node)

    def parseNode(self, node):
        """ Parse function call nodes and collect text """
        if node.__class__ == CallFunc and node.args: 
            child = node.node
            klass = child.__class__
            
            if (# Stanard call _('text')
                (klass == Name and child.name == self._name) or
                # A call to an object attribue: object._('text') 
                (klass == Getattr and child.attrname == self._name)):
                if node.args[0].__class__ == Const:
                    # Good call with a constant _('text')
                    self.addText(node.args[0].value)
                else:
                    # Bad call like _('a' + 'b')
                    self._bad = self._bad + 1
                return 1
        return 0
            
    def walk(self, node):
        """ Walk thourgh all nodes """
        if self._visited.has_key(node):
            # We visited this node allready
            return
            
        self._visited[node] = True            
        if not self.parseNode(node):           
            for child in node.getChildNodes():
                self.walk(child)
    
    def addText(self, text):
        """ Add text to dictionary and count found texts.
        
        Note that number of texts in dictionary could be different from
        the number of texts found, because some texts apear several
        times in the code.

        """

        self._found = self._found + 1
        self._dictionary[text] = text
        
    def dictionary(self):
        return self._dictionary
            
    def bad(self):
        return self._bad
        
    def found(self):
        return self._found
        

def visit(path, visitor):
    tree = compiler.parseFile(path)
    compiler.walk(tree, visitor)


if __name__ == '__main__':
    if not sys.argv[1:2]:
        print 'Usage %s path' % __file__
        sys.exit(1)
        
    textFinder = TextFinder()
    top = sys.argv[1]
    found = 0
    unique = 0   
    bad = 0

    print
    print 'Find texts in %(top)s:' % locals()
    print
    
    for root, dirs, files in os.walk(top):
        for name in files:
            if name.endswith('.py'):
                path = os.path.join(root, name)
                visit(path, textFinder)
                
                # Report each file results
                new_unique = len(textFinder.dictionary()) - unique
                new_found = textFinder.found() - found
                print '%(path)s: %(new_unique)d (of %(new_found)d)' % locals()
                                
                # warn about bad calls - These should be fixed!
                new_bad = textFinder.bad() - bad
                if new_bad:
                    print '### Warning: %(new_bad)d bad call(s)' % locals()
                    print
                
                unique = unique + new_unique
                bad = bad + new_bad
                found = found + new_found

    print
    print ('%(unique)d unique texts in dictionary of '
           '%(found)d texts in source') % locals()
    if bad:
        print '### %(bad)d bad calls' % locals()
    
