epydoc.docparser

   1  # epydoc -- Source code parsing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <edloper@loper.org> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docparser.py 1168 2006-04-05 16:52:56Z edloper $ 
   8   
   9  """ 
  10  Extract API documentation about python objects by parsing their source 
  11  code. 
  12   
  13  L{DocParser} is a processing class that reads the Python source code 
  14  for one or more modules, and uses it to create L{APIDoc} objects 
  15  containing the API documentation for the variables and values defined 
  16  in those modules. 
  17   
  18  C{DocParser} can be subclassed to extend the set of source code 
  19  constructions that it supports. 
  20  """ 
  21  __docformat__ = 'epytext en' 
  22   
  23  ###################################################################### 
  24  ## Imports 
  25  ###################################################################### 
  26   
  27  # Python source code parsing: 
  28  import token, tokenize 
  29  # Finding modules: 
  30  import imp 
  31  # File services: 
  32  import os, os.path 
  33  # Unicode: 
  34  import codecs 
  35  # API documentation encoding: 
  36  from epydoc.apidoc import * 
  37  # For looking up the docs of builtins: 
  38  import __builtin__, exceptions 
  39  import epydoc.docintrospecter  
  40  # Misc utility functions: 
  41  from epydoc.util import * 
  42  # Backwards compatibility 
  43  from epydoc.compat import * 
  44   
  45  ###################################################################### 
  46  ## Doc Parser 
  47  ###################################################################### 
  48   
  49 -class ParseError(Exception): 
  50      """ 
  51      An exception that is used to signify that C{docparser} encountered 
  52      syntactically invalid Python code while processing a Python source 
  53      file. 
  54      """ 
  55   
  56  _moduledoc_cache = {} 
  57  """A cache of C{ModuleDoc}s that we've already created. 
  58  C{_moduledoc_cache} is a dictionary mapping from filenames to 
  59  C{ValueDoc} objects. 
  60  @type: C{dict}""" 
  61   
  62  # [xx] outdated: 
  63  """     
  64  An API documentation extractor based on source code parsing. 
  65  C{DocParser} reads and parses the Python source code for one or 
  66  more modules, and uses it to create L{APIDoc} objects containing 
  67  the API documentation for the variables and values defined in 
  68  those modules.  The main interface method is L{parse()}, which 
  69  returns the documentation for an object with a given dotted name, 
  70  or a module with a given filename. 
  71   
  72  Currently, C{DocParser} extracts documentation from the following 
  73  source code constructions: 
  74   
  75    - module docstring 
  76    - import statements 
  77    - class definition blocks 
  78    - function definition blocks 
  79    - assignment statements 
  80      - simple assignment statements 
  81      - assignment statements with multiple C{'='}s 
  82      - assignment statements with unpacked left-hand sides 
  83      - assignment statements that wrap a function in classmethod 
  84        or staticmethod. 
  85      - assignment to special variables __path__, __all__, and 
  86        __docformat__. 
  87    - delete statements 
  88   
  89  C{DocParser} does not yet support the following source code 
  90  constructions: 
  91   
  92    - assignment statements that create properties 
  93   
  94  By default, C{DocParser} will expore the contents of top-level 
  95  C{try} and C{if} blocks.  If desired, C{DocParser} can also 
  96  be told to explore the contents of C{while} and C{for} blocks. 
  97   
  98  Subclassing 
  99  =========== 
 100  C{DocParser} can be subclassed, to extend the set of source code 
 101  constructions that it supports.  C{DocParser} can be extended in 
 102  several different ways: 
 103   
 104    - [XX] fill this in! 
 105   
 106  """ 
 107   
 108  #//////////////////////////////////////////////////////////// 
 109  # Configuration Constants 
 110  #//////////////////////////////////////////////////////////// 
 111   
 112  #{ Configuration Constants: Control Flow  
 113  PARSE_TRY_BLOCKS = True 
 114  """Should the contents of C{try} blocks be examined?""" 
 115  PARSE_EXCEPT_BLOCKS = True 
 116  """Should the contents of C{except} blocks be examined?""" 
 117  PARSE_FINALLY_BLOCKS = True 
 118  """Should the contents of C{finally} blocks be examined?""" 
 119  PARSE_IF_BLOCKS = True 
 120  """Should the contents of C{if} blocks be examined?""" 
 121  PARSE_ELSE_BLOCKS = True 
 122  """Should the contents of C{else} and C{elif} blocks be examined?""" 
 123  PARSE_WHILE_BLOCKS = False 
 124  """Should the contents of C{while} blocks be examined?""" 
 125  PARSE_FOR_BLOCKS = False 
 126  """Should the contents of C{for} blocks be examined?""" 
 127   
 128  #{ Configuration Constants: Imports 
 129  IMPORT_HANDLING = 'link' 
 130  """What should C{docparser} do when it encounters an import 
 131  statement? 
 132    - C{'link'}: Create variabledoc objects with imported_from pointers 
 133      to the source object. 
 134    - C{'parse'}: Parse the imported file, to find the actual 
 135      documentation for the imported object.  (This will fall back 
 136      to the 'link' behavior if the imported file can't be parsed, 
 137      e.g., if it's a builtin.) 
 138  """ 
 139   
 140  IMPORT_STAR_HANDLING = 'parse' 
 141  """When C{docparser} encounters a C{'from M{m} import *'} 
 142  statement, and is unable to parse C{M{m}} (either because 
 143  L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how 
 144  should it determine the list of identifiers expored by C{M{m}}? 
 145    - C{'ignore'}: ignore the import statement, and don't create 
 146      any new variables. 
 147    - C{'parse'}: parse it to find a list of the identifiers that it 
 148      exports.  (This will fall back to the 'ignore' behavior if the 
 149      imported file can't be parsed, e.g., if it's a builtin.) 
 150    - C{'introspect'}: import the module and introspect it (using C{dir}) 
 151      to find a list of the identifiers that it exports.  (This will 
 152      fall back to the 'ignore' behavior if the imported file can't 
 153      be parsed, e.g., if it's a builtin.) 
 154  """ 
 155   
 156  DEFAULT_DECORATOR_BEHAVIOR = 'opaque' 
 157  """When C{DocParse} encounters an unknown decorator, what should 
 158  it do to the documentation of the decorated function? 
 159    - C{'transparent'}: leave the function's documentation as-is. 
 160    - C{'opaque'}: replace the function's documentation with an 
 161      empty C{ValueDoc} object, reflecting the fact that we have no 
 162      knowledge about what value the decorator returns. 
 163  """ 
 164   
 165  BASE_HANDLING = 'link' 
 166  """What should C{docparser} do when it encounters a base class that 
 167  was imported from another module? 
 168    - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the 
 169      base class. 
 170    - C{'parse'}: Parse the file containing the base class, to find 
 171      the actual documentation for it.  (This will fall back to the 
 172      'link' behavior if the imported file can't be parsed, e.g., if 
 173      it's a builtin.) 
 174  """ 
 175   
 176  #{ Configuration Constants: Comment docstrings 
 177  COMMENT_DOCSTRING_MARKER = '#: ' 
 178  """The prefix used to mark comments that contain attribute 
 179  docstrings for variables.""" 
 180   
 181  #{ Configuration Constants: Grouping 
 182  START_GROUP_MARKER = '#{' 
 183  """The prefix used to mark a comment that starts a group.  This marker 
 184  should be followed (on the same line) by the name of the group. 
 185  Following a start-group comment, all variables defined at the same 
 186  indentation level will be assigned to this group name, until the 
 187  parser reaches the end of the file, a matching end-group comment, or 
 188  another start-group comment at the same indentation level. 
 189  """ 
 190   
 191  END_GROUP_MARKER = '#}' 
 192  """The prefix used to mark a comment that ends a group.  See 
 193  L{START_GROUP_MARKER}.""" 
 194   
 195  #///////////////////////////////////////////////////////////////// 
 196  #{ Module parser 
 197  #///////////////////////////////////////////////////////////////// 
 198   
 199 -def parse_docs(filename=None, name=None, context=None, is_script=False): 
 200      """ 
 201      Generate the API documentation for a specified object by 
 202      parsing Python source files, and return it as a L{ValueDoc}. 
 203      The object to generate documentation for may be specified 
 204      using the C{filename} parameter I{or} the C{name} parameter. 
 205      (It is an error to specify both a filename and a name; or to 
 206      specify neither a filename nor a name). 
 207   
 208      @param filename: The name of the file that contains the python 
 209          source code for a package, module, or script.  If 
 210          C{filename} is specified, then C{parse} will return a 
 211          C{ModuleDoc} describing its contents. 
 212      @param name: The fully-qualified python dotted name of any 
 213          value (including packages, modules, classes, and 
 214          functions).  C{DocParser} will automatically figure out 
 215          which module(s) it needs to parse in order to find the 
 216          documentation for the specified object. 
 217      @param context: The API documentation for the package that 
 218          contains C{filename}.  If no context is given, then 
 219          C{filename} is assumed to contain a top-level module or 
 220          package.  It is an error to specify a C{context} if the 
 221          C{name} argument is used. 
 222      @rtype: L{ValueDoc} 
 223      """ 
 224      # Always introspect __builtins__ & exceptions (e.g., in case 
 225      # they're used as base classes.) 
 226      epydoc.docintrospecter.introspect_docs(__builtin__) 
 227      epydoc.docintrospecter.introspect_docs(exceptions) 
 228       
 229      # If our input is a python object name, then delegate to 
 230      # _find(). 
 231      if filename is None and name is not None: 
 232          if context: 
 233              raise ValueError("context should only be specified together " 
 234                               "with filename, not with name.") 
 235          name = DottedName(name) 
 236          val_doc = _find(name) 
 237          if val_doc.canonical_name == UNKNOWN: 
 238              val_doc.canonical_name = name 
 239          return val_doc 
 240   
 241      # If our input is a filename, then create a ModuleDoc for it, 
 242      # and use process_file() to populate its attributes. 
 243      elif filename is not None and name is None: 
 244          # Use a python source version, if possible. 
 245          if not is_script: 
 246              try: filename = py_src_filename(filename) 
 247              except ValueError, e: raise ImportError('%s' % e) 
 248   
 249          # Check the cache, first. 
 250          if _moduledoc_cache.has_key(filename): 
 251              return _moduledoc_cache[filename] 
 252           
 253          log.info("Parsing %s" % filename) 
 254   
 255          # If the context wasn't provided, then check if the file is in 
 256          # a package directory.  If so, then update basedir & name to 
 257          # contain the topmost package's directory and the fully 
 258          # qualified name for this file.  (This update assume the 
 259          # default value of __path__ for the parent packages; if the 
 260          # parent packages override their __path__s, then this can 
 261          # cause us not to find the value.) 
 262          if context is None and not is_script: 
 263              basedir = os.path.split(filename)[0] 
 264              name = os.path.splitext(os.path.split(filename)[1])[0] 
 265              if name == '__init__': 
 266                  basedir, name = os.path.split(basedir) 
 267              context = _parse_package(basedir) 
 268   
 269          # Figure out the canonical name of the module we're parsing. 
 270          if not is_script: 
 271              module_name, is_pkg = _get_module_name(filename, context) 
 272          else: 
 273              module_name = DottedName(munge_script_name(filename)) 
 274              is_pkg = False 
 275   
 276          # Create a new ModuleDoc for the module, & add it to the cache. 
 277          module_doc = ModuleDoc(canonical_name=module_name, variables={}, 
 278                                 sort_spec=[], imports=[], 
 279                                 filename=filename, package=context, 
 280                                 is_package=is_pkg, submodules=[], 
 281                                 docs_extracted_by='parser') 
 282          module_doc.defining_module = module_doc 
 283          _moduledoc_cache[filename] = module_doc 
 284   
 285          # Set the module's __path__ to its default value. 
 286          if is_pkg: 
 287              module_doc.path = [os.path.split(module_doc.filename)[0]] 
 288           
 289          # Add this module to the parent package's list of submodules. 
 290          if context is not None: 
 291              context.submodules.append(module_doc) 
 292   
 293          # Tokenize & process the contents of the module's source file. 
 294          try: 
 295              process_file(module_doc) 
 296          except tokenize.TokenError, e: 
 297              msg, (srow, scol) = e.args 
 298              raise ParseError('Error during parsing: %s ' 
 299                               '(%s, line %d, char %d)' % 
 300                               (msg, module_doc.filename, srow, scol)) 
 301   
 302          # Handle any special variables (__path__, __docformat__, etc.) 
 303          handle_special_module_vars(module_doc) 
 304   
 305          # Return the completed ModuleDoc 
 306          return module_doc 
 307      else: 
 308          raise ValueError("Expected exactly one of the following " 
 309                           "arguments: name, filename") 
 310   
 311 -def _parse_package(package_dir): 
 312      """ 
 313      If the given directory is a package directory, then parse its 
 314      __init__.py file (and the __init__.py files of all ancestor 
 315      packages); and return its C{ModuleDoc}. 
 316      """ 
 317      if not is_package_dir(package_dir): 
 318          return None 
 319      parent_dir = os.path.split(package_dir)[0] 
 320      parent_doc = _parse_package(parent_dir) 
 321      package_file = os.path.join(package_dir, '__init__') 
 322      return parse_docs(filename=package_file, context=parent_doc) 
 323           
 324  # Special vars: 
 325  # C{__docformat__}, C{__all__}, and C{__path__}. 
 326 -def handle_special_module_vars(module_doc): 
 327      # If __docformat__ is defined, parse its value. 
 328      toktree = _module_var_toktree(module_doc, '__docformat__') 
 329      if toktree is not None: 
 330          try: module_doc.docformat = parse_string(toktree) 
 331          except: pass 
 332          del module_doc.variables['__docformat__'] 
 333               
 334      # If __all__ is defined, parse its value. 
 335      toktree = _module_var_toktree(module_doc, '__all__') 
 336      if toktree is not None: 
 337          try: 
 338              public_names = set(parse_string_list(toktree)) 
 339              for name, var_doc in module_doc.variables.items(): 
 340                  if name in public_names: 
 341                      var_doc.is_public = True 
 342                      if not isinstance(var_doc, ModuleDoc): 
 343                          var_doc.is_imported = False 
 344                  else: 
 345                      var_doc.is_public = False 
 346          except ParseError: 
 347              # If we couldn't parse the list, give precedence to introspection. 
 348              for name, var_doc in module_doc.variables.items(): 
 349                  if not isinstance(var_doc, ModuleDoc): 
 350                      var_doc.is_imported = UNKNOWN 
 351          del module_doc.variables['__all__'] 
 352   
 353      # If __path__ is defined, then extract its value (pkgs only) 
 354      if module_doc.is_package: 
 355          toktree = _module_var_toktree(module_doc, '__path__') 
 356          if toktree is not None: 
 357              try: 
 358                  module_doc.path = parse_string_list(toktree) 
 359              except ParseError: 
 360                  pass # [xx] 
 361              del module_doc.variables['__path__'] 
 362   
 363 -def _module_var_toktree(module_doc, name): 
 364      var_doc = module_doc.variables.get(name) 
 365      if (var_doc is None or var_doc.value in (None, UNKNOWN) or 
 366          var_doc.value.toktree is UNKNOWN): 
 367          return None 
 368      else: 
 369          return var_doc.value.toktree 
 370   
 371  #//////////////////////////////////////////////////////////// 
 372  #{ Module Lookup 
 373  #//////////////////////////////////////////////////////////// 
 374   
 375 -def _find(name, package_doc=None): 
 376      """ 
 377      Return the API documentaiton for the object whose name is 
 378      C{name}.  C{package_doc}, if specified, is the API 
 379      documentation for the package containing the named object. 
 380      """ 
 381      # If we're inside a package, then find the package's path. 
 382      if package_doc is None: 
 383          path = None 
 384      else: 
 385          try: 
 386              # [XXX] 
 387              path_ast = module_doc.variables['__path__'].value.ast 
 388              path = extract_string_list(path_ast) 
 389          except: 
 390              path = [os.path.split(package_doc.filename)[0]] 
 391   
 392      # The leftmost identifier in `name` should be a module or 
 393      # package on the given path; find it and parse it. 
 394      filename = _get_filename(name[0], path) 
 395      module_doc = parse_docs(filename, context=package_doc) 
 396   
 397      # If the name just has one identifier, then the module we just 
 398      # parsed is the object we're looking for; return it. 
 399      if len(name) == 1: return module_doc 
 400   
 401      # Otherwise, we're looking for something inside the module. 
 402      # First, check to see if it's in a variable (but ignore 
 403      # variables that just contain imported submodules). 
 404      if not _is_submodule_import_var(module_doc, name[1]): 
 405          try: return _find_in_namespace(name[1:], module_doc) 
 406          except ImportError: pass 
 407   
 408      # If not, then check to see if it's in a subpackage. 
 409      if module_doc.is_package: 
 410          return _find(name[1:], module_doc) 
 411   
 412      # If it's not in a variable or a subpackage, then we can't 
 413      # find it. 
 414      raise ImportError('Could not find value') 
 415   
 416 -def _is_submodule_import_var(module_doc, var_name): 
 417      """ 
 418      Return true if C{var_name} is the name of a variable in 
 419      C{module_doc} that just contains an C{imported_from} link to a 
 420      submodule of the same name.  (I.e., is a variable created when 
 421      a package imports one of its own submodules.) 
 422      """ 
 423      var_doc = module_doc.variables.get(var_name) 
 424      full_var_name = DottedName(module_doc.canonical_name, var_name) 
 425      return (var_doc is not None and 
 426              var_doc.imported_from == full_var_name) 
 427       
 428 -def _find_in_namespace(name, namespace_doc): 
 429      if name[0] not in namespace_doc.variables: 
 430          raise ImportError('Could not find value') 
 431       
 432      # Look up the variable in the namespace. 
 433      var_doc = namespace_doc.variables[name[0]] 
 434      if var_doc.value is UNKNOWN: 
 435          raise ImportError('Could not find value') 
 436      val_doc = var_doc.value 
 437   
 438      # If the variable's value was imported, then follow its 
 439      # alias link. 
 440      if var_doc.imported_from not in (None, UNKNOWN): 
 441          return _find(var_doc.imported_from+name[1:]) 
 442   
 443      # Otherwise, if the name has one identifier, then this is the 
 444      # value we're looking for; return it. 
 445      elif len(name) == 1: 
 446          return val_doc 
 447   
 448      # Otherwise, if this value is a namespace, look inside it. 
 449      elif isinstance(val_doc, NamespaceDoc): 
 450          return _find_in_namespace(name[1:], val_doc) 
 451   
 452      # Otherwise, we ran into a dead end. 
 453      else: 
 454          raise ImportError('Could not find value') 
 455       
 456 -def _get_filename(identifier, path=None): 
 457      if path == UNKNOWN: path = None 
 458      try: 
 459          fp, filename, (s,m,typ) = imp.find_module(identifier, path) 
 460          if fp is not None: fp.close() 
 461      except ImportError: 
 462          raise ImportError, 'No Python source file found.' 
 463   
 464      if typ == imp.PY_SOURCE: 
 465          return filename 
 466      elif typ == imp.PY_COMPILED: 
 467          # See if we can find a corresponding non-compiled version. 
 468          filename = re.sub('.py\w$', '.py', filename) 
 469          if not os.path.exists(filename): 
 470              raise ImportError, 'No Python source file found.' 
 471          return filename 
 472      elif typ == imp.PKG_DIRECTORY: 
 473          filename = os.path.join(filename, '__init__.py') 
 474          if not os.path.exists(filename): 
 475              filename = os.path.join(filename, '__init__.pyw') 
 476              if not os.path.exists(filename): 
 477                  raise ImportError, 'No package file found.' 
 478          return filename 
 479      elif typ == imp.C_BUILTIN: 
 480          raise ImportError, 'No Python source file for builtin modules.' 
 481      elif typ == imp.C_EXTENSION: 
 482          raise ImportError, 'No Python source file for c extensions.' 
 483      else: 
 484          raise ImportError, 'No Python source file found.' 
 485   
 486  #///////////////////////////////////////////////////////////////// 
 487  #{ File tokenization loop 
 488  #///////////////////////////////////////////////////////////////// 
 489   
 490 -def process_file(module_doc): 
 491      """ 
 492      Read the given C{ModuleDoc}'s file, and add variables 
 493      corresponding to any objects defined in that file.  In 
 494      particular, read and tokenize C{module_doc.filename}, and 
 495      process each logical line using L{process_line()}. 
 496      """ 
 497      # Keep track of the current line number: 
 498      lineno = None 
 499       
 500      # Use this list to collect the tokens on a single logical line: 
 501      line_toks = [] 
 502       
 503      # This list contains one APIDoc for each indentation level. 
 504      # The first element is the APIDoc for the module, and each 
 505      # subsequent element is the APIDoc for the object at that 
 506      # indentation level.  The final element of the list is the 
 507      # C{APIDoc} for the entity that we're currently processing. 
 508      parent_docs = [module_doc] 
 509   
 510      # The APIDoc for the object that was defined by the previous 
 511      # line, if any; or None otherwise.  This is used to update 
 512      # parent_docs when we encounter an indent; and to decide what 
 513      # object (if any) is described by a docstring. 
 514      prev_line_doc = module_doc 
 515   
 516      # A list of comments that occur before or on the current 
 517      # logical line, used to build the comment docstring.  Each 
 518      # element is a tuple (comment_text, comment_lineno). 
 519      comments = [] 
 520   
 521      # A list of decorator lines that occur before the current 
 522      # logical line.  This is used so we can process a function 
 523      # declaration line and its decorators all at once. 
 524      decorators = [] 
 525   
 526      # A list of group names, one for each indentation level.  This is 
 527      # used to keep track groups that are defined by comment markers 
 528      # START_GROUP_MARKER and END_GROUP_MARKER. 
 529      groups = [None] 
 530   
 531      # When we encounter a comment start group marker, set this to the 
 532      # name of the group; but wait until we're ready to process the 
 533      # next line before we actually set groups[-1] to this value.  This 
 534      # is necessary because at the top of a block, the tokenizer gives 
 535      # us comments before the INDENT token; but if we encounter a group 
 536      # start marker at the top of a block, then we want it to apply 
 537      # inside that block, not outside it. 
 538      start_group = None 
 539   
 540      # Check if the source file declares an encoding. 
 541      encoding = get_module_encoding(module_doc.filename) 
 542   
 543      # The token-eating loop: 
 544      try: 
 545          module_file = codecs.open(module_doc.filename, 'rU', encoding) 
 546      except LookupError: 
 547          log.warning("Unknown encoding %r for %s; using the default" 
 548                      "encoding instead (iso-8859-1)" % 
 549                      (encoding, module_doc.filename)) 
 550          encoding = 'iso-8859-1' 
 551          module_file = codecs.open(module_doc.filename, 'rU', encoding) 
 552      tok_iter = tokenize.generate_tokens(module_file.readline) 
 553      for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: 
 554          # BOM encoding marker: ignore. 
 555          if toktype == token.ERRORTOKEN: 
 556              log.debug(type(toktext), `toktext`) 
 557          if (toktype == token.ERRORTOKEN and 
 558              (toktext == u'\ufeff' or 
 559               toktext.encode(encoding) == '\xef\xbb\xbf')): 
 560              pass 
 561               
 562          # Error token: abort 
 563          elif toktype == token.ERRORTOKEN: 
 564              raise ParseError('Error during parsing: invalid syntax ' 
 565                               '(%s, line %d, char %d: %r)' % 
 566                               (module_doc.filename, srow, scol, toktext)) 
 567           
 568          # Indent token: update the parent_doc stack. 
 569          elif toktype == token.INDENT: 
 570              if prev_line_doc is None: 
 571                  parent_docs.append(parent_docs[-1]) 
 572              else: 
 573                  parent_docs.append(prev_line_doc) 
 574              groups.append(None) 
 575                   
 576          # Dedent token: update the parent_doc stack. 
 577          elif toktype == token.DEDENT: 
 578              if line_toks == []: 
 579                  parent_docs.pop() 
 580                  groups.pop() 
 581              else: 
 582                  # This *should* only happen if the file ends on an 
 583                  # indented line, with no final newline. 
 584                  # (otherwise, this is the wrong thing to do.) 
 585                  pass 
 586               
 587          # Line-internal newline token: if we're still at the start of 
 588          # the logical line, and we've seen one or more comment lines, 
 589          # then discard them: blank lines are not allowed between a 
 590          # comment block and the thing it describes. 
 591          elif toktype == tokenize.NL: 
 592              if comments and not line_toks: 
 593                  log.warning('Ignoring docstring comment block followed by ' 
 594                              'a blank line in %r on line %r' % 
 595                              (module_doc.filename, srow-1)) 
 596                  comments = [] 
 597                   
 598          # Comment token: add to comments if appropriate. 
 599          elif toktype == tokenize.COMMENT: 
 600              if toktext.startswith(COMMENT_DOCSTRING_MARKER): 
 601                  comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip() 
 602                  comments.append( [comment_line, srow]) 
 603              elif toktext.startswith(START_GROUP_MARKER): 
 604                  start_group = toktext[len(START_GROUP_MARKER):].strip() 
 605              elif toktext.startswith(END_GROUP_MARKER): 
 606                  for i in range(len(groups)-1, -1, -1): 
 607                      if groups[i]: 
 608                          groups[i] = None 
 609                          break 
 610                  else: 
 611                      log.warning("Got group end marker without a corresponding " 
 612                                  "start marker in %r on line %r" %  
 613                                  (module_doc.filename, srow)) 
 614               
 615          # Normal token: Add it to line_toks.  (If it's a non-unicode 
 616          # string literal, then we need to re-encode using the file's 
 617          # encoding, to get back to the original 8-bit data; and then 
 618          # convert that string with 8-bit data to a 7-bit ascii 
 619          # representation.) 
 620          elif toktype != token.NEWLINE and toktype != token.ENDMARKER: 
 621              if lineno is None: lineno = srow 
 622              if toktype == token.STRING: 
 623                  str_prefixes = re.match('[^\'"]*', toktext).group() 
 624                  if 'u' not in str_prefixes: 
 625                      s = toktext.encode(encoding) 
 626                      toktext = decode_with_backslashreplace(s) 
 627              line_toks.append( (toktype, toktext) ) 
 628               
 629          # Decorator line: add it to the decorators list. 
 630          elif line_toks and line_toks[0] == (token.OP, '@'): 
 631              decorators.append(shallow_parse(line_toks)) 
 632              line_toks = [] 
 633   
 634          # End of line token, but nothing to do. 
 635          elif line_toks == []: 
 636              pass 
 637               
 638          # End of line token: parse the logical line & process it. 
 639          else: 
 640              if start_group: 
 641                  groups[-1] = start_group 
 642                  start_group = None 
 643   
 644              if parent_docs[-1] != 'skip_block': 
 645                  try: 
 646                      prev_line_doc = process_line( 
 647                          shallow_parse(line_toks), parent_docs, prev_line_doc,  
 648                          lineno, comments, decorators, encoding) 
 649                  except ParseError, e: 
 650                      raise ParseError('Error during parsing: invalid ' 
 651                                       'syntax (%s, line %d) -- %s' % 
 652                                       (module_doc.filename, lineno, e)) 
 653   
 654                  # grouping... 
 655                  if groups[-1] and prev_line_doc not in (None, 'skip_block'): 
 656                      if isinstance(prev_line_doc, VariableDoc): 
 657                          # This special case is needed for inst vars, where 
 658                          # parent_docs[-1] is the __init__ function, not the 
 659                          # containing class: 
 660                          add_to_group(prev_line_doc.container, 
 661                                       prev_line_doc, groups[-1]) 
 662                      elif isinstance(parent_docs[-1], NamespaceDoc): 
 663                          add_to_group(parent_docs[-1], prev_line_doc, 
 664                                       groups[-1]) 
 665              else: 
 666                  prev_line_doc = None 
 667   
 668              # Reset line contents. 
 669              line_toks = [] 
 670              lineno = None 
 671              comments = [] 
 672               
 673 -def add_to_group(container, api_doc, group_name): 
 674      if container.group_specs == UNKNOWN: 
 675          container.group_specs = [] 
 676   
 677      if isinstance(api_doc, VariableDoc): 
 678          var_name = api_doc.name 
 679      else: 
 680          var_name = api_doc.canonical_name[-1] 
 681   
 682      for (name, group_vars) in container.group_specs: 
 683          if name == group_name: 
 684              group_vars.append(var_name) 
 685              return 
 686      else: 
 687          container.group_specs.append( (group_name, [var_name]) ) 
 688   
 689  #///////////////////////////////////////////////////////////////// 
 690  #{ Shallow parser 
 691  #///////////////////////////////////////////////////////////////// 
 692   
 693 -def shallow_parse(line_toks): 
 694      """ 
 695      Given a flat list of tokens, return a nested tree structure 
 696      (called a X{token tree}), whose leaves are identical to the 
 697      original list, but whose structure reflects the structure 
 698      implied by the grouping tokens (i.e., parenthases, braces, and 
 699      brackets).  If the parenthases, braces, and brackets do not 
 700      match, or are not balanced, then raise a ParseError. 
 701       
 702      Assign some structure to a sequence of structure (group parens). 
 703      """ 
 704      stack = [[]] 
 705      parens = [] 
 706      for tok in line_toks: 
 707          toktype, toktext = tok 
 708          if toktext in ('(','[','{'): 
 709              parens.append(tok) 
 710              stack.append([tok]) 
 711          elif toktext in ('}',']',')'): 
 712              if not parens: 
 713                  raise ParseError('Unbalanced parens') 
 714              left_paren = parens.pop()[1] 
 715              if left_paren+toktext not in ('()', '[]', '{}'): 
 716                  raise ParseError('Mismatched parens') 
 717              lst = stack.pop() 
 718              lst.append(tok) 
 719              stack[-1].append(lst) 
 720          else: 
 721              stack[-1].append(tok) 
 722      if len(stack) != 1 or len(parens) != 0: 
 723          raise ParseError('Unbalanced parens') 
 724      return stack[0] 
 725   
 726  #///////////////////////////////////////////////////////////////// 
 727  #{ Line processing 
 728  #///////////////////////////////////////////////////////////////// 
 729  # The methods process_*() are used to handle lines. 
 730   
 731 -def process_line(line, parent_docs, prev_line_doc, lineno, 
 732                   comments, decorators, encoding): 
 733      """ 
 734      @return: C{new-doc}, C{decorator}..? 
 735      """ 
 736      args = (line, parent_docs, prev_line_doc, lineno, 
 737              comments, decorators, encoding) 
 738   
 739      if not line: # blank line. 
 740          return None 
 741      elif (token.OP, ':') in line[:-1]: 
 742          return process_one_line_block(*args) 
 743      elif (token.OP, ';') in line: 
 744          return process_multi_stmt(*args) 
 745      elif line[0] == (token.NAME, 'def'): 
 746          return process_funcdef(*args) 
 747      elif line[0] == (token.OP, '@'): 
 748          return process_funcdef(*args) 
 749      elif line[0] == (token.NAME, 'class'): 
 750          return process_classdef(*args) 
 751      elif line[0] == (token.NAME, 'import'): 
 752          return process_import(*args) 
 753      elif line[0] == (token.NAME, 'from'): 
 754          return process_from_import(*args) 
 755      elif line[0] == (token.NAME, 'del'): 
 756          return process_del(*args) 
 757      elif len(line)==1 and line[0][0] == token.STRING: 
 758          return process_docstring(*args) 
 759      elif (token.OP, '=') in line: 
 760          return process_assignment(*args) 
 761      elif (line[0][0] == token.NAME and 
 762            line[0][1] in CONTROL_FLOW_KEYWORDS): 
 763          return process_control_flow_line(*args) 
 764      else: 
 765          return None 
 766          # [xx] do something with control structures like for/if? 
 767   
 768  #///////////////////////////////////////////////////////////////// 
 769  # Line handler: control flow 
 770  #///////////////////////////////////////////////////////////////// 
 771   
 772  CONTROL_FLOW_KEYWORDS = [ 
 773      #: A list of the control flow keywords.  If a line begins with 
 774      #: one of these keywords, then it should be handled by 
 775      #: C{process_control_flow_line}. 
 776      'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally'] 
 777   
 778 -def process_control_flow_line(line, parent_docs, prev_line_doc, 
 779                                lineno, comments, decorators, encoding): 
 780      keyword = line[0][1] 
 781   
 782      # If it's a 'for' block: create the loop variable. 
 783      if keyword == 'for' and PARSE_FOR_BLOCKS: 
 784          loopvar_name = parse_dotted_name( 
 785              split_on(line[1:], (token.NAME, 'in'))[0]) 
 786          parent = get_lhs_parent(loopvar_name, parent_docs) 
 787          if parent is not None: 
 788              var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False,  
 789                                    is_imported=False, is_instvar=False, 
 790                                    docs_extracted_by='parser') 
 791              set_variable(parent, var_doc) 
 792       
 793      if ((keyword == 'if' and PARSE_IF_BLOCKS) or 
 794          (keyword == 'elif' and PARSE_ELSE_BLOCKS) or 
 795          (keyword == 'else' and PARSE_ELSE_BLOCKS) or 
 796          (keyword == 'while' and PARSE_WHILE_BLOCKS) or 
 797          (keyword == 'for' and PARSE_FOR_BLOCKS) or 
 798          (keyword == 'try' and PARSE_TRY_BLOCKS) or 
 799          (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or 
 800          (keyword == 'finally' and PARSE_FINALLY_BLOCKS)): 
 801          # Return "None" to indicate that we should process the 
 802          # block using the same context that we were already in. 
 803          return None 
 804      else: 
 805          # Return 'skip_block' to indicate that we should ignore 
 806          # the contents of this block. 
 807          return 'skip_block' 
 808   
 809  #///////////////////////////////////////////////////////////////// 
 810  # Line handler: imports 
 811  #///////////////////////////////////////////////////////////////// 
 812  # [xx] I could optionally add ValueDoc's for the imported 
 813  # variables with proxy_for set to the imported source; but 
 814  # I don't think I gain much of anything by doing so. 
 815   
 816 -def process_import(line, parent_docs, prev_line_doc, lineno, 
 817                     comments, decorators, encoding): 
 818      if not isinstance(parent_docs[-1], NamespaceDoc): return 
 819       
 820      names = split_on(line[1:], (token.OP, ',')) 
 821       
 822      for name in names: 
 823          name_pieces = split_on(name, (token.NAME, 'as')) 
 824          if len(name_pieces) == 1: 
 825              src_name = parse_dotted_name(name_pieces[0]) 
 826              _import_var(src_name, parent_docs) 
 827          elif len(name_pieces) == 2: 
 828              if len(name_pieces[1]) != 1: 
 829                  raise ParseError('Expected identifier after "as"') 
 830              src_name = parse_dotted_name(name_pieces[0]) 
 831              var_name = parse_name(name_pieces[1][0]) 
 832              _import_var_as(src_name, var_name, parent_docs) 
 833          else: 
 834              raise ParseError('Multiple "as" tokens in import') 
 835   
 836 -def process_from_import(line, parent_docs, prev_line_doc, lineno, 
 837                          comments, decorators, encoding): 
 838      if not isinstance(parent_docs[-1], NamespaceDoc): return 
 839       
 840      pieces = split_on(line[1:], (token.NAME, 'import')) 
 841      if len(pieces) != 2 or not pieces[0] or not pieces[1]: 
 842          raise ParseError("Bad from-import") 
 843      lhs, rhs = pieces 
 844   
 845      # The RHS might be parenthasized, as specified by PEP 328: 
 846      # http://www.python.org/peps/pep-0328.html 
 847      if (len(rhs) == 1 and isinstance(rhs[0], list) and 
 848          rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')): 
 849          rhs = rhs[0][1:-1] 
 850   
 851      # >>> from __future__ import nested_scopes 
 852      if lhs == [(token.NAME, '__future__')]: 
 853          return 
 854   
 855      # >>> from sys import * 
 856      elif rhs == [(token.OP, '*')]: 
 857          src_name = parse_dotted_name(lhs) 
 858          _process_fromstar_import(src_name, parent_docs) 
 859   
 860      # >>> from os.path import join, split 
 861      else: 
 862          src_name = parse_dotted_name(lhs) 
 863          for elt in rhs: 
 864              if elt != (token.OP, ','): 
 865                  var_name = parse_name(elt) 
 866                  _import_var_as(DottedName(src_name, var_name), 
 867                                      var_name, parent_docs) 
 868       
 869 -def _process_fromstar_import(src, parent_docs): 
 870      """ 
 871      Handle a statement of the form: 
 872          >>> from <src> import * 
 873   
 874      If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse 
 875      the module C{M{<src>}}, and copy all of its exported variables 
 876      to C{parent_docs[-1]}. 
 877   
 878      Otherwise, try to determine the names of the variables exported by 
 879      C{M{<src>}}, and create a new variable for each export.  If 
 880      L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if 
 881      found by parsing C{M{<src>}}; if it is C{'introspect'}, then the 
 882      list of exports is found by importing and introspecting 
 883      C{M{<src>}}. 
 884      """ 
 885      # Record the import 
 886      parent_docs[0].imports.append(src) # mark that it's .*?? 
 887       
 888      if not isinstance(parent_docs[-1], NamespaceDoc): return 
 889       
 890      # If src is package-local, then convert it to a global name. 
 891      src = _global_name(src, parent_docs) 
 892   
 893      # [xx] add check for if we already have the source docs in our 
 894      # cache?? 
 895   
 896      if (IMPORT_HANDLING == 'parse' or 
 897          IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok? 
 898          try: module_doc = _find(src) 
 899          except ImportError: module_doc = None 
 900          if isinstance(module_doc, ModuleDoc): 
 901              for name, imp_var in module_doc.variables.items(): 
 902                  # [xx] this is not exactly correct, but close.  It 
 903                  # does the wrong thing if a __var__ is explicitly 
 904                  # listed in __all__. 
 905                  if (imp_var.is_public and 
 906                      not (name.startswith('__') and name.endswith('__'))): 
 907                      var_doc = _add_import_var(DottedName(src, name), name, 
 908                                                parent_docs[-1]) 
 909                      if IMPORT_HANDLING == 'parse': 
 910                          var_doc.value = imp_var.value 
 911   
 912      # If we got here, then either IMPORT_HANDLING='link' or we 
 913      # failed to parse the `src` module. 
 914      if IMPORT_STAR_HANDLING == 'introspect': 
 915          try: module = __import__(str(src), {}, {}, [0]) 
 916          except: return # We couldn't import it. 
 917          if module is None: return # We couldn't import it. 
 918          if hasattr(module, '__all__'): 
 919              names = list(module.__all__) 
 920          else: 
 921              names = [n for n in dir(module) if not n.startswith('_')] 
 922          for name in names: 
 923              _add_import_var(DottedName(src, name), name, parent_docs[-1]) 
 924   
 925 -def _import_var(name, parent_docs): 
 926      """ 
 927      Handle a statement of the form: 
 928          >>> import <name> 
 929   
 930      If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 
 931      the value by parsing; and create an appropriate variable in 
 932      parentdoc. 
 933   
 934      Otherwise, add a variable for the imported variable.  (More than 
 935      one variable may be created for cases like C{'import a.b'}, where 
 936      we need to create a variable C{'a'} in parentdoc containing a 
 937      proxy module; and a variable C{'b'} in the proxy module. 
 938      """ 
 939      # Record the import 
 940      parent_docs[0].imports.append(name) 
 941       
 942      if not isinstance(parent_docs[-1], NamespaceDoc): return 
 943       
 944      # If name is package-local, then convert it to a global name. 
 945      src = _global_name(name, parent_docs) 
 946      src_prefix = src[:len(src)-len(name)] 
 947   
 948      # [xx] add check for if we already have the source docs in our 
 949      # cache?? 
 950   
 951      if IMPORT_HANDLING == 'parse': 
 952          # Check to make sure that we can actually find the value. 
 953          try: val_doc = _find(src) 
 954          except ImportError: val_doc = None 
 955          if val_doc is not None: 
 956              # We found it; but it's not the value itself we want to 
 957              # import, but the module containing it; so import that 
 958              # module (=top_mod) and create a variable for it. 
 959              top_mod = src_prefix+name[0] 
 960              var_doc = _add_import_var(top_mod, name[0], parent_docs[-1]) 
 961              var_doc.value = _find(DottedName(name[0])) 
 962              return 
 963   
 964      # If we got here, then either IMPORT_HANDLING='link', or we 
 965      # did not successfully find the value's docs by parsing; use 
 966      # a variable with an UNKNOWN value. 
 967       
 968      # Create any necessary intermediate proxy module values. 
 969      container = parent_docs[-1] 
 970      for i, identifier in enumerate(name[:-1]): 
 971          if (identifier not in container.variables or 
 972              not isinstance(container.variables[identifier], ModuleDoc)): 
 973              var_doc = _add_import_var(name[:i+1], identifier, container) 
 974              var_doc.value = ModuleDoc(variables={}, sort_spec=[], 
 975                                        proxy_for=src_prefix+name[:i+1], 
 976                                        submodules={},  
 977                                        docs_extracted_by='parser') 
 978          container = container.variables[identifier].value 
 979   
 980      # Add the variable to the container. 
 981      _add_import_var(src, name[-1], container) 
 982   
 983 -def _import_var_as(src, name, parent_docs): 
 984      """ 
 985      Handle a statement of the form: 
 986          >>> import src as name 
 987           
 988      If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 
 989      the value by parsing; and create an appropriate variable in 
 990      parentdoc. 
 991   
 992      Otherwise, create a variables with its C{imported_from} attribute 
 993      pointing to the imported object. 
 994      """ 
 995      # Record the import 
 996      parent_docs[0].imports.append(src) 
 997       
 998      if not isinstance(parent_docs[-1], NamespaceDoc): return 
 999       
1000      # If src is package-local, then convert it to a global name. 
1001      src = _global_name(src, parent_docs) 
1002       
1003      if IMPORT_HANDLING == 'parse': 
1004          # Parse the value and create a variable for it. 
1005          try: val_doc = _find(src) 
1006          except ImportError: val_doc = None 
1007          if val_doc is not None: 
1008              var_doc = VariableDoc(name=name, value=val_doc, 
1009                                    is_imported=True, is_alias=False, 
1010                                    imported_from=src, 
1011                                    docs_extracted_by='parser') 
1012              set_variable(parent_docs[-1], var_doc) 
1013              return 
1014   
1015      # If we got here, then either IMPORT_HANDLING='link', or we 
1016      # did not successfully find the value's docs by parsing; use a 
1017      # variable with a proxy value. 
1018      _add_import_var(src, name, parent_docs[-1]) 
1019   
1020 -def _add_import_var(src, name, container): 
1021      """ 
1022      Add a new imported variable named C{name} to C{container}, with 
1023      C{imported_from=src}. 
1024      """ 
1025      var_doc = VariableDoc(name=name, is_imported=True, is_alias=False, 
1026                            imported_from=src, docs_extracted_by='parser') 
1027      set_variable(container, var_doc) 
1028      return var_doc 
1029   
1030 -def _global_name(name, parent_docs): 
1031      """ 
1032      If the given name is package-local (relative to the current 
1033      context, as determined by C{parent_docs}), then convert it 
1034      to a global name. 
1035      """ 
1036      # Get the containing package from parent_docs. 
1037      if parent_docs[0].is_package: 
1038          package = parent_docs[0] 
1039      else: 
1040          package = parent_docs[0].package 
1041   
1042      # Check each package (from closest to furthest) to see if it 
1043      # contains a module named name[0]; if so, then treat `name` as 
1044      # relative to that package. 
1045      while package not in (None, UNKNOWN): 
1046          try: 
1047              fp = imp.find_module(name[0], package.path)[0] 
1048              if fp is not None: fp.close() 
1049          except ImportError: 
1050              # No submodule found here; try the next package up. 
1051              package = package.package 
1052              continue 
1053          # A submodule was found; return its name. 
1054          return package.canonical_name + name 
1055   
1056      # We didn't find any package containing `name`; so just return 
1057      # `name` as-is. 
1058      return name 
1059   
1060  #///////////////////////////////////////////////////////////////// 
1061  # Line handler: assignment 
1062  #///////////////////////////////////////////////////////////////// 
1063   
1064 -def process_assignment(line, parent_docs, prev_line_doc, lineno, 
1065                         comments, decorators, encoding): 
1066      # Divide the assignment statement into its pieces. 
1067      pieces = split_on(line, (token.OP, '=')) 
1068   
1069      lhs_pieces = pieces[:-1] 
1070      rhs = pieces[-1] 
1071   
1072      # Decide whether the variable is an instance variable or not. 
1073      # If it's an instance var, then discard the value. 
1074      is_instvar = lhs_is_instvar(lhs_pieces, parent_docs) 
1075       
1076      # if it's not an instance var, and we're not in a namespace, 
1077      # then it's just a local var -- so ignore it. 
1078      if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)): 
1079          return None 
1080       
1081      # Evaluate the right hand side. 
1082      if not is_instvar: 
1083          rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs) 
1084      else: 
1085          rhs_val, is_alias = UNKNOWN, False 
1086   
1087      # Assign the right hand side value to each left hand side. 
1088      # (Do the rightmost assignment first) 
1089      lhs_pieces.reverse() 
1090      for lhs in lhs_pieces: 
1091          # Try treating the LHS as a simple dotted name. 
1092          try: lhs_name = parse_dotted_name(lhs) 
1093          except: lhs_name = None 
1094          if lhs_name is not None: 
1095              lhs_parent = get_lhs_parent(lhs_name, parent_docs) 
1096              if lhs_parent is None: continue 
1097              # Create the VariableDoc. 
1098              var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val, 
1099                                    is_imported=False, is_alias=is_alias, 
1100                                    is_instvar=is_instvar, 
1101                                    docs_extracted_by='parser') 
1102              # Extract a docstring from the comments, when present, 
1103              # but only if there's a single LHS. 
1104              if len(lhs_pieces) == 1: 
1105                  add_docstring_from_comments(var_doc, comments) 
1106   
1107              # Assign the variable to the containing namespace, 
1108              # *unless* the variable is an instance variable 
1109              # without a comment docstring.  In that case, we'll 
1110              # only want to add it if we later discover that it's 
1111              # followed by a variable docstring.  If it is, then 
1112              # process_docstring will take care of adding it to the 
1113              # containing clas.  (This is a little hackish, but 
1114              # unfortunately is necessary because we won't know if 
1115              # this assignment line is followed by a docstring 
1116              # until later.) 
1117              if (not is_instvar) or comments: 
1118                  set_variable(lhs_parent, var_doc, True) 
1119   
1120              # If it's the only var, then return the VarDoc for use 
1121              # as the new `prev_line_doc`. 
1122              if (len(lhs_pieces) == 1 and 
1123                  (len(lhs_name) == 1 or is_instvar)): 
1124                  return var_doc 
1125   
1126          # Otherwise, the LHS must be a complex expression; use 
1127          # dotted_names_in() to decide what variables it contains, 
1128          # and create VariableDoc's for all of them (with UNKNOWN 
1129          # value). 
1130          else: 
1131              for lhs_name in dotted_names_in(lhs_pieces): 
1132                  lhs_parent = get_lhs_parent(lhs_name, parent_docs) 
1133                  if lhs_parent is None: continue 
1134                  var_doc = VariableDoc(name=lhs_name[-1], 
1135                                        is_imported=False, 
1136                                        is_alias=is_alias, 
1137                                        is_instvar=is_instvar, 
1138                                        docs_extracted_by='parser') 
1139                  set_variable(lhs_parent, var_doc, True) 
1140   
1141          # If we have multiple left-hand-sides, then all but the 
1142          # rightmost one are considered aliases. 
1143          is_alias = True 
1144           
1145   
1146 -def lhs_is_instvar(lhs_pieces, parent_docs): 
1147      if not isinstance(parent_docs[-1], RoutineDoc): 
1148          return False 
1149      # make sure that lhs_pieces is <self>.<name>, where <self> is 
1150      # the name of the first arg to the containing routinedoc, and 
1151      # <name> is a simple name. 
1152      posargs = parent_docs[-1].posargs 
1153      if not (len(lhs_pieces)==1 and len(posargs) > 0 and  
1154              len(lhs_pieces[0]) == 3 and 
1155              lhs_pieces[0][0] == (token.NAME, posargs[0]) and 
1156              lhs_pieces[0][1] == (token.OP, '.') and 
1157              lhs_pieces[0][2][0] == token.NAME): 
1158          return False 
1159      # Make sure we're in an instance method, and not a 
1160      # module-level function. 
1161      for i in range(len(parent_docs)-1, -1, -1): 
1162          if isinstance(parent_docs[i], ClassDoc): 
1163              return True 
1164          elif parent_docs[i] != parent_docs[-1]: 
1165              return False 
1166      return False 
1167           
1168 -def rhs_to_valuedoc(rhs, parent_docs): 
1169      # Dotted variable: 
1170      try: 
1171          rhs_name = parse_dotted_name(rhs) 
1172          rhs_val = lookup_value(rhs_name, parent_docs) 
1173          if rhs_val is not None: 
1174              return rhs_val, True 
1175      except ParseError: 
1176          pass 
1177   
1178      # Decorators: 
1179      if (len(rhs)==2 and rhs[0][0] == token.NAME and 
1180          isinstance(rhs[1], list)): 
1181          arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs) 
1182          if isinstance(arg_val, RoutineDoc): 
1183              doc = apply_decorator(DottedName(rhs[0][1]), arg_val) 
1184              doc.canonical_name = UNKNOWN 
1185              doc.parse_repr = pp_toktree(rhs) 
1186              return doc, False 
1187   
1188      # Nothing else to do: make a val with the source as its repr. 
1189      return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs, 
1190                             defining_module=parent_docs[0], 
1191                             docs_extracted_by='parser'), False 
1192   
1193 -def get_lhs_parent(lhs_name, parent_docs): 
1194      assert isinstance(lhs_name, DottedName) 
1195   
1196      # For instance vars inside an __init__ method: 
1197      if isinstance(parent_docs[-1], RoutineDoc): 
1198          for i in range(len(parent_docs)-1, -1, -1): 
1199              if isinstance(parent_docs[i], ClassDoc): 
1200                  return parent_docs[i] 
1201          else: 
1202              raise ValueError("%r is not a namespace or method" % 
1203                               parent_docs[-1]) 
1204   
1205      # For local variables: 
1206      if len(lhs_name) == 1: 
1207          return parent_docs[-1] 
1208   
1209      # For non-local variables: 
1210      return lookup_value(lhs_name.container(), parent_docs) 
1211   
1212  #///////////////////////////////////////////////////////////////// 
1213  # Line handler: single-line blocks 
1214  #///////////////////////////////////////////////////////////////// 
1215   
1216 -def process_one_line_block(line, parent_docs, prev_line_doc, lineno, 
1217                             comments, decorators, encoding): 
1218      """ 
1219      The line handler for single-line blocks, such as: 
1220   
1221          >>> def f(x): return x*2 
1222   
1223      This handler calls L{process_line} twice: once for the tokens 
1224      up to and including the colon, and once for the remaining 
1225      tokens.  The comment docstring is applied to the first line 
1226      only. 
1227      @return: C{None} 
1228      """ 
1229      i = line.index((token.OP, ':')) 
1230      doc1 = process_line(line[:i+1], parent_docs, prev_line_doc, 
1231                               lineno, comments, decorators, encoding) 
1232      doc2 = process_line(line[i+1:], parent_docs+[doc1], 
1233                               doc1, lineno, None, [], encoding) 
1234      return doc1 
1235   
1236  #///////////////////////////////////////////////////////////////// 
1237  # Line handler: semicolon-separated statements 
1238  #///////////////////////////////////////////////////////////////// 
1239   
1240 -def process_multi_stmt(line, parent_docs, prev_line_doc, lineno, 
1241                         comments, decorators, encoding): 
1242      """ 
1243      The line handler for semicolon-separated statements, such as: 
1244   
1245          >>> x=1; y=2; z=3 
1246   
1247      This handler calls L{process_line} once for each statement. 
1248      The comment docstring is not passed on to any of the 
1249      sub-statements. 
1250      @return: C{None} 
1251      """ 
1252      for statement in split_on(line, (token.OP, ';')): 
1253          if not statement: continue 
1254          doc = process_line(statement, parent_docs, prev_line_doc,  
1255                             lineno, None, decorators, encoding) 
1256          prev_line_doc = doc 
1257          decorators = [] 
1258      return None 
1259   
1260  #///////////////////////////////////////////////////////////////// 
1261  # Line handler: delete statements 
1262  #///////////////////////////////////////////////////////////////// 
1263   
1264 -def process_del(line, parent_docs, prev_line_doc, lineno, 
1265                  comments, decorators, encoding): 
1266      """ 
1267      The line handler for delete statements, such as: 
1268   
1269          >>> del x, y.z 
1270   
1271      This handler calls L{del_variable} for each dotted variable in 
1272      the variable list.  The variable list may be nested.  Complex 
1273      expressions in the variable list (such as C{x[3]}) are ignored. 
1274      @return: C{None} 
1275      """ 
1276      # If we're not in a namespace, then ignore it. 
1277      parent_doc = parent_docs[-1] 
1278      if not isinstance(parent_doc, NamespaceDoc): return 
1279   
1280      var_list = split_on(line[1:], (token.OP, ',')) 
1281      for var_name in dotted_names_in(var_list): 
1282          del_variable(parent_docs[-1], var_name) 
1283   
1284      return None 
1285   
1286  #///////////////////////////////////////////////////////////////// 
1287  # Line handler: docstrings 
1288  #///////////////////////////////////////////////////////////////// 
1289   
1290 -def process_docstring(line, parent_docs, prev_line_doc, lineno, 
1291                        comments, decorators, encoding): 
1292      """ 
1293      The line handler for bare string literals.  If 
1294      C{prev_line_doc} is not C{None}, then the string literal is 
1295      added to that C{APIDoc} as a docstring.  If it already has a 
1296      docstring (from comment docstrings), then the new docstring 
1297      will be appended to the old one. 
1298      """ 
1299      if prev_line_doc is None: return 
1300      docstring = parse_string(line) 
1301   
1302      # If the docstring is a str, then convert it to unicode. 
1303      # According to a strict reading of PEP 263, this might not be the 
1304      # right thing to do; but it will almost always be what the 
1305      # module's author intended. 
1306      if isinstance(docstring, str): 
1307          docstring = docstring.decode(encoding) 
1308   
1309      # If the modified APIDoc is an instance variable, and it has 
1310      # not yet been added to its class's C{variables} list, 
1311      # then add it now.  This is done here, rather than in the 
1312      # process_assignment() call that created the variable, because 
1313      # we only want to add instance variables if they have an 
1314      # associated docstring.  (For more info, see the comment above 
1315      # the set_variable() call in process_assignment().) 
1316      if (isinstance(prev_line_doc, VariableDoc) and 
1317          prev_line_doc.is_instvar and 
1318          prev_line_doc.docstring in (None, UNKNOWN)): 
1319          for i in range(len(parent_docs)-1, -1, -1): 
1320              if isinstance(parent_docs[i], ClassDoc): 
1321                  set_variable(parent_docs[i], prev_line_doc, True) 
1322                  break 
1323   
1324      if prev_line_doc.docstring not in (None, UNKNOWN): 
1325          log.warning("%s has both a comment-docstring and a normal " 
1326                      "(string) docstring; ignoring the comment-" 
1327                      "docstring." % prev_line_doc.canonical_name) 
1328           
1329      prev_line_doc.docstring = docstring 
1330      prev_line_doc.docstring_lineno = lineno 
1331   
1332       
1333  #///////////////////////////////////////////////////////////////// 
1334  # Line handler: function declarations 
1335  #///////////////////////////////////////////////////////////////// 
1336   
1337 -def process_funcdef(line, parent_docs, prev_line_doc, lineno, 
1338                      comments, decorators, encoding): 
1339      """ 
1340      The line handler for function declaration lines, such as: 
1341   
1342          >>> def f(a, b=22, (c,d)): 
1343   
1344      This handler creates and initializes a new C{VariableDoc} 
1345      containing a C{RoutineDoc}, adds the C{VariableDoc} to the 
1346      containing namespace, and returns the C{RoutineDoc}. 
1347      """ 
1348      # Check syntax. 
1349      if len(line) != 4 or line[3] != (token.OP, ':'): 
1350          raise ParseError("Bad function definition line") 
1351       
1352      # If we're not in a namespace, then ignore it. 
1353      parent_doc = parent_docs[-1] 
1354      if not isinstance(parent_doc, NamespaceDoc): return 
1355   
1356      # Get the function's name 
1357      func_name = parse_name(line[1]) 
1358      canonical_name = DottedName(parent_doc.canonical_name, func_name) 
1359   
1360      # Create the function's RoutineDoc. 
1361      func_doc = RoutineDoc(canonical_name=canonical_name, 
1362                            defining_module=parent_docs[0], 
1363                            lineno=lineno, docs_extracted_by='parser') 
1364   
1365      # Process the signature. 
1366      init_arglist(func_doc, line[2]) 
1367   
1368      # If the preceeding comment includes a docstring, then add it. 
1369      add_docstring_from_comments(func_doc, comments) 
1370       
1371      # Apply any decorators. 
1372      decorators.reverse() 
1373      for decorator in decorators: 
1374          try: 
1375              deco_name = parse_dotted_name(decorator[1:]) 
1376          except ParseError: 
1377              deco_name = None 
1378          if func_doc.canonical_name is not UNKNOWN: 
1379              deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 
1380                                      func_doc.canonical_name) 
1381          elif func_doc.parse_repr not in (None, UNKNOWN): 
1382              # [xx] this case should be improved.. when will func_doc 
1383              # have a known parse_repr?? 
1384              deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 
1385                                      func_doc.parse_repr) 
1386          else: 
1387              deco_repr = UNKNOWN 
1388          func_doc = apply_decorator(deco_name, func_doc) 
1389          func_doc.canonical_name = UNKNOWN 
1390          func_doc.parse_repr = deco_repr 
1391   
1392      # Add a variable to the containing namespace. 
1393      var_doc = VariableDoc(name=func_name, value=func_doc, 
1394                            is_imported=False, is_alias=False, 
1395                            docs_extracted_by='parser') 
1396      set_variable(parent_doc, var_doc) 
1397       
1398      # Return the new ValueDoc. 
1399      return func_doc 
1400   
1401 -def apply_decorator(decorator_name, func_doc): 
1402      # [xx] what if func_doc is not a RoutineDoc? 
1403      if decorator_name == DottedName('staticmethod'): 
1404          return StaticMethodDoc(**func_doc.__dict__) 
1405      elif decorator_name == DottedName('classmethod'): 
1406          return ClassMethodDoc(**func_doc.__dict__) 
1407      elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent': 
1408          return func_doc.__class__(**func_doc.__dict__) # make a copy. 
1409      elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': 
1410          return GenericValueDoc(docs_extracted_by='parser') 
1411      else: 
1412          raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR' 
1413   
1414 -def init_arglist(func_doc, arglist): 
1415      if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): 
1416          raise ParseError("Bad argument list") 
1417   
1418      # Initialize to defaults. 
1419      func_doc.posargs = [] 
1420      func_doc.posarg_defaults = [] 
1421      func_doc.vararg = None 
1422      func_doc.kwarg = None 
1423   
1424      # Divide the arglist into individual args. 
1425      args = split_on(arglist[1:-1], (token.OP, ',')) 
1426   
1427      # Keyword argument. 
1428      if args and args[-1][0] == (token.OP, '**'): 
1429          if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 
1430              raise ParseError("Expected name after ** in argument list") 
1431          func_doc.kwarg = args[-1][1][1] 
1432          args.pop() 
1433   
1434      # Vararg argument. 
1435      if args and args[-1][0] == (token.OP, '*'): 
1436          if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 
1437              raise ParseError("Expected name after * in argument list") 
1438          func_doc.vararg = args[-1][1][1] 
1439          args.pop() 
1440   
1441      # Positional arguments. 
1442      for arg in args: 
1443          func_doc.posargs.append(parse_funcdef_arg(arg[0])) 
1444          if len(arg) == 1: 
1445              func_doc.posarg_defaults.append(None) 
1446          elif arg[1] != (token.OP, '=') or len(arg) == 2: 
1447              raise ParseError("Bad argument list") 
1448          else: 
1449              default_val = GenericValueDoc(parse_repr=pp_toktree(arg[2:]), 
1450                                            docs_extracted_by='parser') 
1451              func_doc.posarg_defaults.append(default_val) 
1452   
1453  #///////////////////////////////////////////////////////////////// 
1454  # Line handler: class declarations 
1455  #///////////////////////////////////////////////////////////////// 
1456   
1457 -def process_classdef(line, parent_docs, prev_line_doc, lineno, 
1458                       comments, decorators, encoding): 
1459      """ 
1460      The line handler for class declaration lines, such as: 
1461       
1462          >>> class Foo(Bar, Baz): 
1463   
1464      This handler creates and initializes a new C{VariableDoc} 
1465      containing a C{ClassDoc}, adds the C{VariableDoc} to the 
1466      containing namespace, and returns the C{ClassDoc}. 
1467      """ 
1468      # Check syntax 
1469      if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'): 
1470          raise ParseError("Bad class definition line") 
1471   
1472      # If we're not in a namespace, then ignore it. 
1473      parent_doc = parent_docs[-1] 
1474      if not isinstance(parent_doc, NamespaceDoc): return 
1475   
1476      # Get the class's name 
1477      class_name = parse_name(line[1]) 
1478      canonical_name = DottedName(parent_doc.canonical_name, class_name) 
1479   
1480      # Create the class's ClassDoc & VariableDoc. 
1481      class_doc = ClassDoc(variables={}, sort_spec=[], 
1482                           bases=[], subclasses=[], 
1483                           canonical_name=canonical_name, 
1484                           defining_module=parent_docs[0], 
1485                           docs_extracted_by='parser') 
1486      var_doc = VariableDoc(name=class_name, value=class_doc, 
1487                            is_imported=False, is_alias=False, 
1488                            docs_extracted_by='parser') 
1489   
1490      # Add the bases. 
1491      if len(line) == 4: 
1492          if (not isinstance(line[2], list) or 
1493              line[2][0] != (token.OP, '(')): 
1494              raise ParseError("Expected base list") 
1495          try: 
1496              for base_name in parse_classdef_bases(line[2]): 
1497                  class_doc.bases.append(find_base(base_name, parent_docs)) 
1498          except ParseError, e: 
1499              log.warning("Unable to extract the base list for %s: %s" % 
1500                          (canonical_name, e)) 
1501              class_doc.bases = UNKNOWN 
1502      else: 
1503          class_doc.bases = [] 
1504   
1505      # Register ourselves as a subclass to our bases. 
1506      if class_doc.bases is not UNKNOWN: 
1507          for basedoc in class_doc.bases: 
1508              if isinstance(basedoc, ClassDoc): 
1509                  basedoc.subclasses.append(class_doc) 
1510       
1511      # If the preceeding comment includes a docstring, then add it. 
1512      add_docstring_from_comments(class_doc, comments) 
1513       
1514      # Add the VariableDoc to our container. 
1515      set_variable(parent_doc, var_doc) 
1516   
1517      return class_doc 
1518   
1519 -def find_base(name, parent_docs): 
1520      assert isinstance(name, DottedName) 
1521   
1522      # Find the variable containing the base. 
1523      base_var = lookup_variable(name, parent_docs) 
1524      if base_var is None: 
1525          # If it looks like it's in an external module, then try 
1526          # "importing" it. 
1527          if (lookup_name(name[0], parent_docs).imported_from not in 
1528              (None, UNKNOWN)): 
1529              _import_var(name, parent_docs) 
1530              base_var = lookup_variable(name, parent_docs) 
1531          # If we still don't have a var containing the base, give up. 
1532          if base_var is None: 
1533              raise ParseError("Could not find %s" % name) 
1534   
1535      # If the variable has a value, return that value. 
1536      if base_var.value != UNKNOWN: 
1537          return base_var.value 
1538   
1539      # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for 
1540      # the base class; if that fails, or if BASE_HANDLING is 'link', 
1541      # just make a proxy object. 
1542      if base_var.imported_from not in (None, UNKNOWN): 
1543          if BASE_HANDLING == 'parse': 
1544              try: 
1545                  return parse_docs(name=base_var.imported_from) 
1546              except ParseError: 
1547                  pass 
1548          # Either BASE_HANDLING='link' or parsing the base class failed; 
1549          # return a proxy value for the base class. 
1550          return ClassDoc(variables={}, sort_spec=[], bases=[], 
1551                          subclasses=[], proxy_for=base_var.imported_from, 
1552                          docs_extracted_by='parser') 
1553      else: 
1554          raise ParseError() # no value available for var. 
1555                       
1556                       
1557       
1558   
1559  #///////////////////////////////////////////////////////////////// 
1560  #{ Parsing 
1561  #///////////////////////////////////////////////////////////////// 
1562   
1563 -def dotted_names_in(elt_list): 
1564      """ 
1565      Return a list of all simple dotted names in the given 
1566      expression. 
1567      """ 
1568      names = [] 
1569      while elt_list: 
1570          elt = elt_list.pop() 
1571          if len(elt) == 1 and isinstance(elt[0], list): 
1572              # Nested list: process the contents 
1573              elt_list.extend(split_on(elt[0][1:-1], (token.OP, ','))) 
1574          else: 
1575              try: 
1576                  names.append(parse_dotted_name(elt)) 
1577              except ParseError: 
1578                  pass # complex expression -- ignore 
1579      return names 
1580   
1581 -def parse_name(elt, strip_parens=False): 
1582      """ 
1583      If the given token tree element is a name token, then return 
1584      that name as a string.  Otherwise, raise ParseError. 
1585      @param strip_parens: If true, then if elt is a single name 
1586          enclosed in parenthases, then return that name. 
1587      """ 
1588      if strip_parens and isinstance(elt, list): 
1589          while (isinstance(elt, list) and len(elt) == 3 and 
1590                 elt[0] == (token.OP, '(') and 
1591                 elt[-1] == (token.OP, ')')): 
1592              elt = elt[1] 
1593      if isinstance(elt, list) or elt[0] != token.NAME: 
1594          raise ParseError("Bad name") 
1595      return elt[1] 
1596   
1597 -def parse_dotted_name(elt_list, strip_parens=True): 
1598      """ 
1599      @bug: does not handle 'x.(y).z' 
1600      """ 
1601      if len(elt_list) == 0: raise ParseError("Bad dotted name") 
1602       
1603      # Handle ((x.y).z).  (If the contents of the parens include 
1604      # anything other than dotted names, such as (x,y), then we'll 
1605      # catch it below and raise a ParseError. 
1606      while (isinstance(elt_list[0], list) and 
1607             len(elt_list[0]) >= 3 and 
1608             elt_list[0][0] == (token.OP, '(') and 
1609             elt_list[0][-1] == (token.OP, ')')): 
1610          elt_list[:1] = elt_list[0][1:-1] 
1611   
1612      if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name") 
1613      name = DottedName(parse_name(elt_list[0], True)) 
1614      for i in range(2, len(elt_list), 2): 
1615          dot, identifier = elt_list[i-1], elt_list[i] 
1616          if  dot != (token.OP, '.'): 
1617              raise ParseError("Bad dotted name") 
1618          name = DottedName(name, parse_name(identifier, True)) 
1619      return name 
1620           
1621 -def split_on(elt_list, split_tok): 
1622      # [xx] add code to guarantee each elt is non-empty. 
1623      result = [[]] 
1624      for elt in elt_list: 
1625          if elt == split_tok: 
1626              if result[-1] == []: raise ParseError("Empty element from split") 
1627              result.append([]) 
1628          else: 
1629              result[-1].append(elt) 
1630      if result[-1] == []: result.pop() 
1631      return result 
1632   
1633 -def parse_funcdef_arg(elt): 
1634      """ 
1635      If the given tree token element contains a valid function 
1636      definition argument (i.e., an identifier token or nested list 
1637      of identifiers), then return a corresponding string identifier 
1638      or nested list of string identifiers.  Otherwise, raise a 
1639      ParseError. 
1640      """ 
1641      if isinstance(elt, list): 
1642          if elt[0] == (token.OP, '('): 
1643              if len(elt) == 3: 
1644                  return parse_funcdef_arg(elt[1]) 
1645              else: 
1646                  return [parse_funcdef_arg(e) 
1647                          for e in elt[1:-1] 
1648                          if e != (token.OP, ',')] 
1649          else: 
1650              raise ParseError("Bad argument -- expected name or tuple") 
1651      elif elt[0] == token.NAME: 
1652          return elt[1] 
1653      else: 
1654          raise ParseError("Bad argument -- expected name or tuple") 
1655       
1656 -def parse_classdef_bases(elt): 
1657      """ 
1658      If the given tree token element contains a valid base list 
1659      (that contains only dotted names), then return a corresponding 
1660      list of L{DottedName}s.  Otherwise, raise a ParseError. 
1661       
1662      @bug: Does not handle either of:: 
1663          - class A( (base.in.parens) ): pass 
1664          - class B( (lambda:calculated.base)() ): pass 
1665      """ 
1666      if (not isinstance(elt, list) or 
1667          elt[0] != (token.OP, '(')): 
1668          raise ParseError("Bad base list") 
1669   
1670      return [parse_dotted_name(n) 
1671              for n in split_on(elt[1:-1], (token.OP, ','))] 
1672   
1673  # Used by: base list; 'del'; ... 
1674 -def parse_dotted_name_list(elt_list): 
1675      """ 
1676      If the given list of tree token elements contains a 
1677      comma-separated list of dotted names, then return a 
1678      corresponding list of L{DottedName} objects.  Otherwise, raise 
1679      ParseError. 
1680      """ 
1681      names = [] 
1682       
1683      state = 0 
1684      for elt in elt_list: 
1685          # State 0 -- Expecting a name, or end of arglist 
1686          if state == 0: 
1687              # Make sure it's a name 
1688              if isinstance(elt, tuple) and elt[0] == token.NAME: 
1689                  names.append(DottedName(elt[1])) 
1690                  state = 1 
1691              else: 
1692                  raise ParseError("Expected a name") 
1693          # State 1 -- Expecting comma, period, or end of arglist 
1694          elif state == 1: 
1695              if elt == (token.OP, '.'): 
1696                  state = 2 
1697              elif elt == (token.OP, ','): 
1698                  state = 0 
1699              else: 
1700                  raise ParseError("Expected '.' or ',' or end of list") 
1701          # State 2 -- Continuation of dotted name. 
1702          elif state == 2: 
1703              if isinstance(elt, tuple) and elt[0] == token.NAME: 
1704                  names[-1] = DottedName(names[-1], elt[1]) 
1705                  state = 1 
1706              else: 
1707                  raise ParseError("Expected a name") 
1708      if state == 2: 
1709          raise ParseError("Expected a name") 
1710      return names 
1711   
1712 -def parse_string(elt_list): 
1713      if len(elt_list) == 1 and elt_list[0][0] == token.STRING: 
1714          # [xx] use something safer here?  But it needs to deal with 
1715          # any string type (eg r"foo\bar" etc). 
1716          return eval(elt_list[0][1]) 
1717      else: 
1718          raise ParseError("Expected a string") 
1719   
1720  # ['1', 'b', 'c'] 
1721 -def parse_string_list(elt_list): 
1722      if (len(elt_list) == 1 and isinstance(elt_list, list) and 
1723          elt_list[0][0][1] in ('(', '[')): 
1724          elt_list = elt_list[0][1:-1] 
1725   
1726      string_list = [] 
1727      for string_elt in split_on(elt_list, (token.OP, ',')): 
1728          string_list.append(parse_string(string_elt)) 
1729   
1730      return string_list 
1731   
1732  #///////////////////////////////////////////////////////////////// 
1733  #{ Variable Manipulation 
1734  #///////////////////////////////////////////////////////////////// 
1735   
1736 -def set_variable(namespace, var_doc, preserve_docstring=False): 
1737      """ 
1738      Add var_doc to namespace.  If namespace already contains a 
1739      variable with the same name, then discard the old variable.  If 
1740      C{preserve_docstring} is true, then keep the old variable's 
1741      docstring when overwriting a variable. 
1742      """ 
1743      # Choose which dictionary we'll be storing the variable in. 
1744      if not isinstance(namespace, NamespaceDoc): 
1745          return 
1746      # If we already have a variable with this name, then remove the 
1747      # old VariableDoc from the sort_spec list; and if we gave its 
1748      # value a canonical name, then delete it. 
1749      if var_doc.name in namespace.variables: 
1750          namespace.sort_spec.remove(var_doc.name) 
1751          old_var_doc = namespace.variables[var_doc.name] 
1752          if (old_var_doc.is_alias == False and 
1753              old_var_doc.value != UNKNOWN): 
1754              old_var_doc.value.canonical_name = UNKNOWN 
1755          if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and 
1756              old_var_doc.docstring not in (None, UNKNOWN)): 
1757              var_doc.docstring = old_var_doc.docstring 
1758              var_doc.docstring_lineno = old_var_doc.docstring_lineno 
1759      # Add the variable to the namespace. 
1760      namespace.variables[var_doc.name] = var_doc 
1761      namespace.sort_spec.append(var_doc.name) 
1762      assert var_doc.container is UNKNOWN 
1763      var_doc.container = namespace 
1764   
1765 -def del_variable(namespace, name): 
1766      if not isinstance(namespace, NamespaceDoc): 
1767          return 
1768   
1769      if name[0] in namespace.variables: 
1770          if len(name) == 1: 
1771              var_doc = namespace.variables[name[0]] 
1772              namespace.sort_spec.remove(name[0]) 
1773              del namespace.variables[name[0]] 
1774              if not var_doc.is_alias and var_doc.value is not UNKNOWN: 
1775                  var_doc.value.canonical_name = UNKNOWN 
1776          else: 
1777              del_variable(namespace.variables[name[0]].value, name[1:]) 
1778               
1779  #///////////////////////////////////////////////////////////////// 
1780  #{ Name Lookup 
1781  #///////////////////////////////////////////////////////////////// 
1782   
1783 -def lookup_name(identifier, parent_docs): 
1784      """ 
1785      Find and return the documentation for the variable named by 
1786      the given identifier. 
1787       
1788      @rtype: L{VariableDoc} or C{None} 
1789      """ 
1790      # We need to check 3 namespaces: locals, globals, and builtins. 
1791      # Note that this is true even if we're in a version of python with 
1792      # nested scopes, because nested scope lookup does not apply to 
1793      # nested class definitions, and we're not worried about variables 
1794      # in nested functions. 
1795      if not isinstance(identifier, basestring): 
1796          raise TypeError('identifier must be a string') 
1797   
1798      # Locals 
1799      if isinstance(parent_docs[-1], NamespaceDoc): 
1800          if parent_docs[-1].variables.has_key(identifier): 
1801              return parent_docs[-1].variables[identifier] 
1802   
1803      # Globals (aka the containing module) 
1804      if isinstance(parent_docs[0], NamespaceDoc): 
1805          if parent_docs[0].variables.has_key(identifier): 
1806              return parent_docs[0].variables[identifier] 
1807   
1808      # Builtins 
1809      builtins = epydoc.docintrospecter.introspect_docs(__builtin__) 
1810      if isinstance(builtins, NamespaceDoc): 
1811          if builtins.variables.has_key(identifier): 
1812              return builtins.variables[identifier] 
1813   
1814      # We didn't find it; return None. 
1815      return None 
1816   
1817 -def lookup_variable(dotted_name, parent_docs): 
1818      assert isinstance(dotted_name, DottedName) 
1819      # If it's a simple identifier, use lookup_name. 
1820      if len(dotted_name) == 1: 
1821          return lookup_name(dotted_name[0], parent_docs) 
1822   
1823      # If it's a dotted name with multiple pieces, look up the 
1824      # namespace containing the var (=parent) first; and then 
1825      # look for the var in that namespace. 
1826      else: 
1827          parent = lookup_value(dotted_name[:-1], parent_docs) 
1828          if (isinstance(parent, NamespaceDoc) and 
1829              dotted_name[-1] in parent.variables): 
1830              return parent.variables[dotted_name[-1]] 
1831          else: 
1832              return None # var not found. 
1833   
1834 -def lookup_value(dotted_name, parent_docs): 
1835      """ 
1836      Find and return the documentation for the value contained in 
1837      the variable with the given name in the current namespace. 
1838      """ 
1839      assert isinstance(dotted_name, DottedName) 
1840      var_doc = lookup_name(dotted_name[0], parent_docs) 
1841   
1842      for i in range(1, len(dotted_name)): 
1843          if var_doc is None: return None 
1844   
1845          if isinstance(var_doc.value, NamespaceDoc): 
1846              var_dict = var_doc.value.variables 
1847          elif (var_doc.value == UNKNOWN and 
1848              var_doc.imported_from not in (None, UNKNOWN)): 
1849              src_name = var_doc.imported_from + dotted_name[i:] 
1850              # [xx] do I want to create a proxy here?? 
1851              return GenericValueDoc(proxy_for=src_name, 
1852                                     parse_repr=str(dotted_name), 
1853                                     docs_extracted_by='parser') 
1854          else: 
1855              return None 
1856   
1857          var_doc = var_dict.get(dotted_name[i]) 
1858   
1859      if var_doc is None: return None 
1860      return var_doc.value 
1861   
1862  #///////////////////////////////////////////////////////////////// 
1863  #{ Docstring Comments 
1864  #///////////////////////////////////////////////////////////////// 
1865   
1866 -def add_docstring_from_comments(api_doc, comments): 
1867      if api_doc is None or not comments: return 
1868      api_doc.docstring = '\n'.join([line for (line, lineno) in comments]) 
1869      api_doc.docstring_lineno = comments[0][1] 
1870   
1871  #///////////////////////////////////////////////////////////////// 
1872  #{ Tree tokens 
1873  #///////////////////////////////////////////////////////////////// 
1874   
1875 -def pp_toktree(elts, spacing='normal', indent=0): 
1876      s = u'' 
1877      for elt in elts: 
1878          # Put a blank line before class & def statements. 
1879          if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'): 
1880              s += '\n%s' % ('    '*indent) 
1881   
1882          if isinstance(elt, tuple): 
1883              if elt[0] == token.NEWLINE: 
1884                  s += '    '+elt[1] 
1885                  s += '\n%s' % ('    '*indent) 
1886              elif elt[0] == token.INDENT: 
1887                  s += '    ' 
1888                  indent += 1 
1889              elif elt[0] == token.DEDENT: 
1890                  assert s[-4:] == '    ' 
1891                  s = s[:-4] 
1892                  indent -= 1 
1893              elif elt[0] == tokenize.COMMENT: 
1894                  s += elt[1].rstrip() + '\n' + '    '*indent 
1895              else: 
1896                  s += elt[1] 
1897          else: 
1898              elt_s = pp_toktree(elt, spacing, indent) 
1899              # Join them.  s = left side; elt_s = right side. 
1900              if (elt_s=='' or s=='' or 
1901                  s in ('-','`') or elt_s in ('}',']',')','`',':') or 
1902                  elt_s[0] in ('.',',') or s[-1] in ('(','[','{','.','\n',' ') or 
1903                  (elt_s[0] == '(' and s[-1] not in (',','='))): 
1904                  s = '%s%s' % (s, elt_s) 
1905              elif (spacing=='tight' and 
1906                    s[-1] in '+-*/=,' or elt_s[0] in '+-*/=,'): 
1907                  s = '%s%s' % (s, elt_s) 
1908              else: 
1909                  s = '%s %s' % (s, elt_s) 
1910      return s 
1911           
1912  #///////////////////////////////////////////////////////////////// 
1913  #{ Helper Functions 
1914  #///////////////////////////////////////////////////////////////// 
1915   
1916 -def get_module_encoding(filename): 
1917      """ 
1918      @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>} 
1919      """ 
1920      module_file = open(filename, 'rU') 
1921      try: 
1922          lines = [module_file.readline() for i in range(2)] 
1923          if lines[0].startswith('\xef\xbb\xbf'): 
1924              return 'utf-8' 
1925          else: 
1926              for line in lines: 
1927                  m = re.search("coding[:=]\s*([-\w.]+)", line) 
1928                  if m: return m.group(1) 
1929                   
1930          # Fall back on Python's default encoding. 
1931          return 'iso-8859-1' # aka 'latin-1' 
1932      finally: 
1933          module_file.close() 
1934           
1935 -def _get_module_name(filename, package_doc): 
1936      """ 
1937      Return (dotted_name, is_package) 
1938      """ 
1939      name = re.sub(r'.py\w?$', '', os.path.split(filename)[1]) 
1940      if name == '__init__': 
1941          is_package = True 
1942          name = os.path.split(os.path.split(filename)[0])[1] 
1943      else: 
1944          is_package = False 
1945   
1946      # [XX] if the module contains a script, then `name` may not 
1947      # necessarily be a valid identifier -- which will cause 
1948      # DottedName to raise an exception.  Is that what I want? 
1949      if package_doc is None: 
1950          dotted_name = DottedName(name) 
1951      else: 
1952          dotted_name = DottedName(package_doc.canonical_name, name) 
1953   
1954      # Check if the module looks like it's shadowed by a variable. 
1955      # If so, then add a "'" to the end of its canonical name, to 
1956      # distinguish it from the variable. 
1957      if package_doc is not None and name in package_doc.variables: 
1958          valdoc = package_doc.variables[name].value 
1959          if (valdoc not in (None, UNKNOWN) and 
1960              valdoc.imported_from != dotted_name): 
1961              log.warning("Module %s might be shadowed by a variable with " 
1962                          "the same name." % dotted_name) 
1963              dotted_name = DottedName(str(dotted_name)+"'") 
1964   
1965      return dotted_name, is_package 
1966   
1967 -def flatten(lst, out=None): 
1968      """ 
1969      @return: a flat list containing the leaves of the given nested 
1970          list. 
1971      @param lst: The nested list that should be flattened. 
1972      """ 
1973      if out is None: out = [] 
1974      for elt in lst: 
1975          if isinstance(elt, (list, tuple)): 
1976              flatten(elt, out) 
1977          else: 
1978              out.append(elt) 
1979      return out 
1980
Source Code for Module epydoc.docparser