Package epydoc :: Module docparser
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docparser

   1  # epydoc -- Source code parsing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <edloper@loper.org> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docparser.py 1168 2006-04-05 16:52:56Z edloper $ 
   8   
   9  """ 
  10  Extract API documentation about python objects by parsing their source 
  11  code. 
  12   
  13  L{DocParser} is a processing class that reads the Python source code 
  14  for one or more modules, and uses it to create L{APIDoc} objects 
  15  containing the API documentation for the variables and values defined 
  16  in those modules. 
  17   
  18  C{DocParser} can be subclassed to extend the set of source code 
  19  constructions that it supports. 
  20  """ 
  21  __docformat__ = 'epytext en' 
  22   
  23  ###################################################################### 
  24  ## Imports 
  25  ###################################################################### 
  26   
  27  # Python source code parsing: 
  28  import token, tokenize 
  29  # Finding modules: 
  30  import imp 
  31  # File services: 
  32  import os, os.path 
  33  # Unicode: 
  34  import codecs 
  35  # API documentation encoding: 
  36  from epydoc.apidoc import * 
  37  # For looking up the docs of builtins: 
  38  import __builtin__, exceptions 
  39  import epydoc.docintrospecter  
  40  # Misc utility functions: 
  41  from epydoc.util import * 
  42  # Backwards compatibility 
  43  from epydoc.compat import * 
  44   
  45  ###################################################################### 
  46  ## Doc Parser 
  47  ###################################################################### 
  48   
49 -class ParseError(Exception):
50 """ 51 An exception that is used to signify that C{docparser} encountered 52 syntactically invalid Python code while processing a Python source 53 file. 54 """
55 56 _moduledoc_cache = {} 57 """A cache of C{ModuleDoc}s that we've already created. 58 C{_moduledoc_cache} is a dictionary mapping from filenames to 59 C{ValueDoc} objects. 60 @type: C{dict}""" 61 62 # [xx] outdated: 63 """ 64 An API documentation extractor based on source code parsing. 65 C{DocParser} reads and parses the Python source code for one or 66 more modules, and uses it to create L{APIDoc} objects containing 67 the API documentation for the variables and values defined in 68 those modules. The main interface method is L{parse()}, which 69 returns the documentation for an object with a given dotted name, 70 or a module with a given filename. 71 72 Currently, C{DocParser} extracts documentation from the following 73 source code constructions: 74 75 - module docstring 76 - import statements 77 - class definition blocks 78 - function definition blocks 79 - assignment statements 80 - simple assignment statements 81 - assignment statements with multiple C{'='}s 82 - assignment statements with unpacked left-hand sides 83 - assignment statements that wrap a function in classmethod 84 or staticmethod. 85 - assignment to special variables __path__, __all__, and 86 __docformat__. 87 - delete statements 88 89 C{DocParser} does not yet support the following source code 90 constructions: 91 92 - assignment statements that create properties 93 94 By default, C{DocParser} will expore the contents of top-level 95 C{try} and C{if} blocks. If desired, C{DocParser} can also 96 be told to explore the contents of C{while} and C{for} blocks. 97 98 Subclassing 99 =========== 100 C{DocParser} can be subclassed, to extend the set of source code 101 constructions that it supports. C{DocParser} can be extended in 102 several different ways: 103 104 - [XX] fill this in! 105 106 """ 107 108 #//////////////////////////////////////////////////////////// 109 # Configuration Constants 110 #//////////////////////////////////////////////////////////// 111 112 #{ Configuration Constants: Control Flow 113 PARSE_TRY_BLOCKS = True 114 """Should the contents of C{try} blocks be examined?""" 115 PARSE_EXCEPT_BLOCKS = True 116 """Should the contents of C{except} blocks be examined?""" 117 PARSE_FINALLY_BLOCKS = True 118 """Should the contents of C{finally} blocks be examined?""" 119 PARSE_IF_BLOCKS = True 120 """Should the contents of C{if} blocks be examined?""" 121 PARSE_ELSE_BLOCKS = True 122 """Should the contents of C{else} and C{elif} blocks be examined?""" 123 PARSE_WHILE_BLOCKS = False 124 """Should the contents of C{while} blocks be examined?""" 125 PARSE_FOR_BLOCKS = False 126 """Should the contents of C{for} blocks be examined?""" 127 128 #{ Configuration Constants: Imports 129 IMPORT_HANDLING = 'link' 130 """What should C{docparser} do when it encounters an import 131 statement? 132 - C{'link'}: Create variabledoc objects with imported_from pointers 133 to the source object. 134 - C{'parse'}: Parse the imported file, to find the actual 135 documentation for the imported object. (This will fall back 136 to the 'link' behavior if the imported file can't be parsed, 137 e.g., if it's a builtin.) 138 """ 139 140 IMPORT_STAR_HANDLING = 'parse' 141 """When C{docparser} encounters a C{'from M{m} import *'} 142 statement, and is unable to parse C{M{m}} (either because 143 L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how 144 should it determine the list of identifiers expored by C{M{m}}? 145 - C{'ignore'}: ignore the import statement, and don't create 146 any new variables. 147 - C{'parse'}: parse it to find a list of the identifiers that it 148 exports. (This will fall back to the 'ignore' behavior if the 149 imported file can't be parsed, e.g., if it's a builtin.) 150 - C{'introspect'}: import the module and introspect it (using C{dir}) 151 to find a list of the identifiers that it exports. (This will 152 fall back to the 'ignore' behavior if the imported file can't 153 be parsed, e.g., if it's a builtin.) 154 """ 155 156 DEFAULT_DECORATOR_BEHAVIOR = 'opaque' 157 """When C{DocParse} encounters an unknown decorator, what should 158 it do to the documentation of the decorated function? 159 - C{'transparent'}: leave the function's documentation as-is. 160 - C{'opaque'}: replace the function's documentation with an 161 empty C{ValueDoc} object, reflecting the fact that we have no 162 knowledge about what value the decorator returns. 163 """ 164 165 BASE_HANDLING = 'link' 166 """What should C{docparser} do when it encounters a base class that 167 was imported from another module? 168 - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the 169 base class. 170 - C{'parse'}: Parse the file containing the base class, to find 171 the actual documentation for it. (This will fall back to the 172 'link' behavior if the imported file can't be parsed, e.g., if 173 it's a builtin.) 174 """ 175 176 #{ Configuration Constants: Comment docstrings 177 COMMENT_DOCSTRING_MARKER = '#: ' 178 """The prefix used to mark comments that contain attribute 179 docstrings for variables.""" 180 181 #{ Configuration Constants: Grouping 182 START_GROUP_MARKER = '#{' 183 """The prefix used to mark a comment that starts a group. This marker 184 should be followed (on the same line) by the name of the group. 185 Following a start-group comment, all variables defined at the same 186 indentation level will be assigned to this group name, until the 187 parser reaches the end of the file, a matching end-group comment, or 188 another start-group comment at the same indentation level. 189 """ 190 191 END_GROUP_MARKER = '#}' 192 """The prefix used to mark a comment that ends a group. See 193 L{START_GROUP_MARKER}.""" 194 195 #///////////////////////////////////////////////////////////////// 196 #{ Module parser 197 #///////////////////////////////////////////////////////////////// 198
199 -def parse_docs(filename=None, name=None, context=None, is_script=False):
200 """ 201 Generate the API documentation for a specified object by 202 parsing Python source files, and return it as a L{ValueDoc}. 203 The object to generate documentation for may be specified 204 using the C{filename} parameter I{or} the C{name} parameter. 205 (It is an error to specify both a filename and a name; or to 206 specify neither a filename nor a name). 207 208 @param filename: The name of the file that contains the python 209 source code for a package, module, or script. If 210 C{filename} is specified, then C{parse} will return a 211 C{ModuleDoc} describing its contents. 212 @param name: The fully-qualified python dotted name of any 213 value (including packages, modules, classes, and 214 functions). C{DocParser} will automatically figure out 215 which module(s) it needs to parse in order to find the 216 documentation for the specified object. 217 @param context: The API documentation for the package that 218 contains C{filename}. If no context is given, then 219 C{filename} is assumed to contain a top-level module or 220 package. It is an error to specify a C{context} if the 221 C{name} argument is used. 222 @rtype: L{ValueDoc} 223 """ 224 # Always introspect __builtins__ & exceptions (e.g., in case 225 # they're used as base classes.) 226 epydoc.docintrospecter.introspect_docs(__builtin__) 227 epydoc.docintrospecter.introspect_docs(exceptions) 228 229 # If our input is a python object name, then delegate to 230 # _find(). 231 if filename is None and name is not None: 232 if context: 233 raise ValueError("context should only be specified together " 234 "with filename, not with name.") 235 name = DottedName(name) 236 val_doc = _find(name) 237 if val_doc.canonical_name == UNKNOWN: 238 val_doc.canonical_name = name 239 return val_doc 240 241 # If our input is a filename, then create a ModuleDoc for it, 242 # and use process_file() to populate its attributes. 243 elif filename is not None and name is None: 244 # Use a python source version, if possible. 245 if not is_script: 246 try: filename = py_src_filename(filename) 247 except ValueError, e: raise ImportError('%s' % e) 248 249 # Check the cache, first. 250 if _moduledoc_cache.has_key(filename): 251 return _moduledoc_cache[filename] 252 253 log.info("Parsing %s" % filename) 254 255 # If the context wasn't provided, then check if the file is in 256 # a package directory. If so, then update basedir & name to 257 # contain the topmost package's directory and the fully 258 # qualified name for this file. (This update assume the 259 # default value of __path__ for the parent packages; if the 260 # parent packages override their __path__s, then this can 261 # cause us not to find the value.) 262 if context is None and not is_script: 263 basedir = os.path.split(filename)[0] 264 name = os.path.splitext(os.path.split(filename)[1])[0] 265 if name == '__init__': 266 basedir, name = os.path.split(basedir) 267 context = _parse_package(basedir) 268 269 # Figure out the canonical name of the module we're parsing. 270 if not is_script: 271 module_name, is_pkg = _get_module_name(filename, context) 272 else: 273 module_name = DottedName(munge_script_name(filename)) 274 is_pkg = False 275 276 # Create a new ModuleDoc for the module, & add it to the cache. 277 module_doc = ModuleDoc(canonical_name=module_name, variables={}, 278 sort_spec=[], imports=[], 279 filename=filename, package=context, 280 is_package=is_pkg, submodules=[], 281 docs_extracted_by='parser') 282 module_doc.defining_module = module_doc 283 _moduledoc_cache[filename] = module_doc 284 285 # Set the module's __path__ to its default value. 286 if is_pkg: 287 module_doc.path = [os.path.split(module_doc.filename)[0]] 288 289 # Add this module to the parent package's list of submodules. 290 if context is not None: 291 context.submodules.append(module_doc) 292 293 # Tokenize & process the contents of the module's source file. 294 try: 295 process_file(module_doc) 296 except tokenize.TokenError, e: 297 msg, (srow, scol) = e.args 298 raise ParseError('Error during parsing: %s ' 299 '(%s, line %d, char %d)' % 300 (msg, module_doc.filename, srow, scol)) 301 302 # Handle any special variables (__path__, __docformat__, etc.) 303 handle_special_module_vars(module_doc) 304 305 # Return the completed ModuleDoc 306 return module_doc 307 else: 308 raise ValueError("Expected exactly one of the following " 309 "arguments: name, filename")
310
311 -def _parse_package(package_dir):
312 """ 313 If the given directory is a package directory, then parse its 314 __init__.py file (and the __init__.py files of all ancestor 315 packages); and return its C{ModuleDoc}. 316 """ 317 if not is_package_dir(package_dir): 318 return None 319 parent_dir = os.path.split(package_dir)[0] 320 parent_doc = _parse_package(parent_dir) 321 package_file = os.path.join(package_dir, '__init__') 322 return parse_docs(filename=package_file, context=parent_doc)
323 324 # Special vars: 325 # C{__docformat__}, C{__all__}, and C{__path__}.
326 -def handle_special_module_vars(module_doc):
327 # If __docformat__ is defined, parse its value. 328 toktree = _module_var_toktree(module_doc, '__docformat__') 329 if toktree is not None: 330 try: module_doc.docformat = parse_string(toktree) 331 except: pass 332 del module_doc.variables['__docformat__'] 333 334 # If __all__ is defined, parse its value. 335 toktree = _module_var_toktree(module_doc, '__all__') 336 if toktree is not None: 337 try: 338 public_names = set(parse_string_list(toktree)) 339 for name, var_doc in module_doc.variables.items(): 340 if name in public_names: 341 var_doc.is_public = True 342 if not isinstance(var_doc, ModuleDoc): 343 var_doc.is_imported = False 344 else: 345 var_doc.is_public = False 346 except ParseError: 347 # If we couldn't parse the list, give precedence to introspection. 348 for name, var_doc in module_doc.variables.items(): 349 if not isinstance(var_doc, ModuleDoc): 350 var_doc.is_imported = UNKNOWN 351 del module_doc.variables['__all__'] 352 353 # If __path__ is defined, then extract its value (pkgs only) 354 if module_doc.is_package: 355 toktree = _module_var_toktree(module_doc, '__path__') 356 if toktree is not None: 357 try: 358 module_doc.path = parse_string_list(toktree) 359 except ParseError: 360 pass # [xx] 361 del module_doc.variables['__path__']
362
363 -def _module_var_toktree(module_doc, name):
364 var_doc = module_doc.variables.get(name) 365 if (var_doc is None or var_doc.value in (None, UNKNOWN) or 366 var_doc.value.toktree is UNKNOWN): 367 return None 368 else: 369 return var_doc.value.toktree
370 371 #//////////////////////////////////////////////////////////// 372 #{ Module Lookup 373 #//////////////////////////////////////////////////////////// 374
375 -def _find(name, package_doc=None):
376 """ 377 Return the API documentaiton for the object whose name is 378 C{name}. C{package_doc}, if specified, is the API 379 documentation for the package containing the named object. 380 """ 381 # If we're inside a package, then find the package's path. 382 if package_doc is None: 383 path = None 384 else: 385 try: 386 # [XXX] 387 path_ast = module_doc.variables['__path__'].value.ast 388 path = extract_string_list(path_ast) 389 except: 390 path = [os.path.split(package_doc.filename)[0]] 391 392 # The leftmost identifier in `name` should be a module or 393 # package on the given path; find it and parse it. 394 filename = _get_filename(name[0], path) 395 module_doc = parse_docs(filename, context=package_doc) 396 397 # If the name just has one identifier, then the module we just 398 # parsed is the object we're looking for; return it. 399 if len(name) == 1: return module_doc 400 401 # Otherwise, we're looking for something inside the module. 402 # First, check to see if it's in a variable (but ignore 403 # variables that just contain imported submodules). 404 if not _is_submodule_import_var(module_doc, name[1]): 405 try: return _find_in_namespace(name[1:], module_doc) 406 except ImportError: pass 407 408 # If not, then check to see if it's in a subpackage. 409 if module_doc.is_package: 410 return _find(name[1:], module_doc) 411 412 # If it's not in a variable or a subpackage, then we can't 413 # find it. 414 raise ImportError('Could not find value')
415
416 -def _is_submodule_import_var(module_doc, var_name):
417 """ 418 Return true if C{var_name} is the name of a variable in 419 C{module_doc} that just contains an C{imported_from} link to a 420 submodule of the same name. (I.e., is a variable created when 421 a package imports one of its own submodules.) 422 """ 423 var_doc = module_doc.variables.get(var_name) 424 full_var_name = DottedName(module_doc.canonical_name, var_name) 425 return (var_doc is not None and 426 var_doc.imported_from == full_var_name)
427
428 -def _find_in_namespace(name, namespace_doc):
429 if name[0] not in namespace_doc.variables: 430 raise ImportError('Could not find value') 431 432 # Look up the variable in the namespace. 433 var_doc = namespace_doc.variables[name[0]] 434 if var_doc.value is UNKNOWN: 435 raise ImportError('Could not find value') 436 val_doc = var_doc.value 437 438 # If the variable's value was imported, then follow its 439 # alias link. 440 if var_doc.imported_from not in (None, UNKNOWN): 441 return _find(var_doc.imported_from+name[1:]) 442 443 # Otherwise, if the name has one identifier, then this is the 444 # value we're looking for; return it. 445 elif len(name) == 1: 446 return val_doc 447 448 # Otherwise, if this value is a namespace, look inside it. 449 elif isinstance(val_doc, NamespaceDoc): 450 return _find_in_namespace(name[1:], val_doc) 451 452 # Otherwise, we ran into a dead end. 453 else: 454 raise ImportError('Could not find value')
455
456 -def _get_filename(identifier, path=None):
457 if path == UNKNOWN: path = None 458 try: 459 fp, filename, (s,m,typ) = imp.find_module(identifier, path) 460 if fp is not None: fp.close() 461 except ImportError: 462 raise ImportError, 'No Python source file found.' 463 464 if typ == imp.PY_SOURCE: 465 return filename 466 elif typ == imp.PY_COMPILED: 467 # See if we can find a corresponding non-compiled version. 468 filename = re.sub('.py\w$', '.py', filename) 469 if not os.path.exists(filename): 470 raise ImportError, 'No Python source file found.' 471 return filename 472 elif typ == imp.PKG_DIRECTORY: 473 filename = os.path.join(filename, '__init__.py') 474 if not os.path.exists(filename): 475 filename = os.path.join(filename, '__init__.pyw') 476 if not os.path.exists(filename): 477 raise ImportError, 'No package file found.' 478 return filename 479 elif typ == imp.C_BUILTIN: 480 raise ImportError, 'No Python source file for builtin modules.' 481 elif typ == imp.C_EXTENSION: 482 raise ImportError, 'No Python source file for c extensions.' 483 else: 484 raise ImportError, 'No Python source file found.'
485 486 #///////////////////////////////////////////////////////////////// 487 #{ File tokenization loop 488 #///////////////////////////////////////////////////////////////// 489
490 -def process_file(module_doc):
491 """ 492 Read the given C{ModuleDoc}'s file, and add variables 493 corresponding to any objects defined in that file. In 494 particular, read and tokenize C{module_doc.filename}, and 495 process each logical line using L{process_line()}. 496 """ 497 # Keep track of the current line number: 498 lineno = None 499 500 # Use this list to collect the tokens on a single logical line: 501 line_toks = [] 502 503 # This list contains one APIDoc for each indentation level. 504 # The first element is the APIDoc for the module, and each 505 # subsequent element is the APIDoc for the object at that 506 # indentation level. The final element of the list is the 507 # C{APIDoc} for the entity that we're currently processing. 508 parent_docs = [module_doc] 509 510 # The APIDoc for the object that was defined by the previous 511 # line, if any; or None otherwise. This is used to update 512 # parent_docs when we encounter an indent; and to decide what 513 # object (if any) is described by a docstring. 514 prev_line_doc = module_doc 515 516 # A list of comments that occur before or on the current 517 # logical line, used to build the comment docstring. Each 518 # element is a tuple (comment_text, comment_lineno). 519 comments = [] 520 521 # A list of decorator lines that occur before the current 522 # logical line. This is used so we can process a function 523 # declaration line and its decorators all at once. 524 decorators = [] 525 526 # A list of group names, one for each indentation level. This is 527 # used to keep track groups that are defined by comment markers 528 # START_GROUP_MARKER and END_GROUP_MARKER. 529 groups = [None] 530 531 # When we encounter a comment start group marker, set this to the 532 # name of the group; but wait until we're ready to process the 533 # next line before we actually set groups[-1] to this value. This 534 # is necessary because at the top of a block, the tokenizer gives 535 # us comments before the INDENT token; but if we encounter a group 536 # start marker at the top of a block, then we want it to apply 537 # inside that block, not outside it. 538 start_group = None 539 540 # Check if the source file declares an encoding. 541 encoding = get_module_encoding(module_doc.filename) 542 543 # The token-eating loop: 544 try: 545 module_file = codecs.open(module_doc.filename, 'rU', encoding) 546 except LookupError: 547 log.warning("Unknown encoding %r for %s; using the default" 548 "encoding instead (iso-8859-1)" % 549 (encoding, module_doc.filename)) 550 encoding = 'iso-8859-1' 551 module_file = codecs.open(module_doc.filename, 'rU', encoding) 552 tok_iter = tokenize.generate_tokens(module_file.readline) 553 for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: 554 # BOM encoding marker: ignore. 555 if toktype == token.ERRORTOKEN: 556 log.debug(type(toktext), `toktext`) 557 if (toktype == token.ERRORTOKEN and 558 (toktext == u'\ufeff' or 559 toktext.encode(encoding) == '\xef\xbb\xbf')): 560 pass 561 562 # Error token: abort 563 elif toktype == token.ERRORTOKEN: 564 raise ParseError('Error during parsing: invalid syntax ' 565 '(%s, line %d, char %d: %r)' % 566 (module_doc.filename, srow, scol, toktext)) 567 568 # Indent token: update the parent_doc stack. 569 elif toktype == token.INDENT: 570 if prev_line_doc is None: 571 parent_docs.append(parent_docs[-1]) 572 else: 573 parent_docs.append(prev_line_doc) 574 groups.append(None) 575 576 # Dedent token: update the parent_doc stack. 577 elif toktype == token.DEDENT: 578 if line_toks == []: 579 parent_docs.pop() 580 groups.pop() 581 else: 582 # This *should* only happen if the file ends on an 583 # indented line, with no final newline. 584 # (otherwise, this is the wrong thing to do.) 585 pass 586 587 # Line-internal newline token: if we're still at the start of 588 # the logical line, and we've seen one or more comment lines, 589 # then discard them: blank lines are not allowed between a 590 # comment block and the thing it describes. 591 elif toktype == tokenize.NL: 592 if comments and not line_toks: 593 log.warning('Ignoring docstring comment block followed by ' 594 'a blank line in %r on line %r' % 595 (module_doc.filename, srow-1)) 596 comments = [] 597 598 # Comment token: add to comments if appropriate. 599 elif toktype == tokenize.COMMENT: 600 if toktext.startswith(COMMENT_DOCSTRING_MARKER): 601 comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip() 602 comments.append( [comment_line, srow]) 603 elif toktext.startswith(START_GROUP_MARKER): 604 start_group = toktext[len(START_GROUP_MARKER):].strip() 605 elif toktext.startswith(END_GROUP_MARKER): 606 for i in range(len(groups)-1, -1, -1): 607 if groups[i]: 608 groups[i] = None 609 break 610 else: 611 log.warning("Got group end marker without a corresponding " 612 "start marker in %r on line %r" % 613 (module_doc.filename, srow)) 614 615 # Normal token: Add it to line_toks. (If it's a non-unicode 616 # string literal, then we need to re-encode using the file's 617 # encoding, to get back to the original 8-bit data; and then 618 # convert that string with 8-bit data to a 7-bit ascii 619 # representation.) 620 elif toktype != token.NEWLINE and toktype != token.ENDMARKER: 621 if lineno is None: lineno = srow 622 if toktype == token.STRING: 623 str_prefixes = re.match('[^\'"]*', toktext).group() 624 if 'u' not in str_prefixes: 625 s = toktext.encode(encoding) 626 toktext = decode_with_backslashreplace(s) 627 line_toks.append( (toktype, toktext) ) 628 629 # Decorator line: add it to the decorators list. 630 elif line_toks and line_toks[0] == (token.OP, '@'): 631 decorators.append(shallow_parse(line_toks)) 632 line_toks = [] 633 634 # End of line token, but nothing to do. 635 elif line_toks == []: 636 pass 637 638 # End of line token: parse the logical line & process it. 639 else: 640 if start_group: 641 groups[-1] = start_group 642 start_group = None 643 644 if parent_docs[-1] != 'skip_block': 645 try: 646 prev_line_doc = process_line( 647 shallow_parse(line_toks), parent_docs, prev_line_doc, 648 lineno, comments, decorators, encoding) 649 except ParseError, e: 650 raise ParseError('Error during parsing: invalid ' 651 'syntax (%s, line %d) -- %s' % 652 (module_doc.filename, lineno, e)) 653 654 # grouping... 655 if groups[-1] and prev_line_doc not in (None, 'skip_block'): 656 if isinstance(prev_line_doc, VariableDoc): 657 # This special case is needed for inst vars, where 658 # parent_docs[-1] is the __init__ function, not the 659 # containing class: 660 add_to_group(prev_line_doc.container, 661 prev_line_doc, groups[-1]) 662 elif isinstance(parent_docs[-1], NamespaceDoc): 663 add_to_group(parent_docs[-1], prev_line_doc, 664 groups[-1]) 665 else: 666 prev_line_doc = None 667 668 # Reset line contents. 669 line_toks = [] 670 lineno = None 671 comments = []
672
673 -def add_to_group(container, api_doc, group_name):
674 if container.group_specs == UNKNOWN: 675 container.group_specs = [] 676 677 if isinstance(api_doc, VariableDoc): 678 var_name = api_doc.name 679 else: 680 var_name = api_doc.canonical_name[-1] 681 682 for (name, group_vars) in container.group_specs: 683 if name == group_name: 684 group_vars.append(var_name) 685 return 686 else: 687 container.group_specs.append( (group_name, [var_name]) )
688 689 #///////////////////////////////////////////////////////////////// 690 #{ Shallow parser 691 #///////////////////////////////////////////////////////////////// 692
693 -def shallow_parse(line_toks):
694 """ 695 Given a flat list of tokens, return a nested tree structure 696 (called a X{token tree}), whose leaves are identical to the 697 original list, but whose structure reflects the structure 698 implied by the grouping tokens (i.e., parenthases, braces, and 699 brackets). If the parenthases, braces, and brackets do not 700 match, or are not balanced, then raise a ParseError. 701 702 Assign some structure to a sequence of structure (group parens). 703 """ 704 stack = [[]] 705 parens = [] 706 for tok in line_toks: 707 toktype, toktext = tok 708 if toktext in ('(','[','{'): 709 parens.append(tok) 710 stack.append([tok]) 711 elif toktext in ('}',']',')'): 712 if not parens: 713 raise ParseError('Unbalanced parens') 714 left_paren = parens.pop()[1] 715 if left_paren+toktext not in ('()', '[]', '{}'): 716 raise ParseError('Mismatched parens') 717 lst = stack.pop() 718 lst.append(tok) 719 stack[-1].append(lst) 720 else: 721 stack[-1].append(tok) 722 if len(stack) != 1 or len(parens) != 0: 723 raise ParseError('Unbalanced parens') 724 return stack[0]
725 726 #///////////////////////////////////////////////////////////////// 727 #{ Line processing 728 #///////////////////////////////////////////////////////////////// 729 # The methods process_*() are used to handle lines. 730
731 -def process_line(line, parent_docs, prev_line_doc, lineno, 732 comments, decorators, encoding):
733 """ 734 @return: C{new-doc}, C{decorator}..? 735 """ 736 args = (line, parent_docs, prev_line_doc, lineno, 737 comments, decorators, encoding) 738 739 if not line: # blank line. 740 return None 741 elif (token.OP, ':') in line[:-1]: 742 return process_one_line_block(*args) 743 elif (token.OP, ';') in line: 744 return process_multi_stmt(*args) 745 elif line[0] == (token.NAME, 'def'): 746 return process_funcdef(*args) 747 elif line[0] == (token.OP, '@'): 748 return process_funcdef(*args) 749 elif line[0] == (token.NAME, 'class'): 750 return process_classdef(*args) 751 elif line[0] == (token.NAME, 'import'): 752 return process_import(*args) 753 elif line[0] == (token.NAME, 'from'): 754 return process_from_import(*args) 755 elif line[0] == (token.NAME, 'del'): 756 return process_del(*args) 757 elif len(line)==1 and line[0][0] == token.STRING: 758 return process_docstring(*args) 759 elif (token.OP, '=') in line: 760 return process_assignment(*args) 761 elif (line[0][0] == token.NAME and 762 line[0][1] in CONTROL_FLOW_KEYWORDS): 763 return process_control_flow_line(*args) 764 else: 765 return None
766 # [xx] do something with control structures like for/if? 767 768 #///////////////////////////////////////////////////////////////// 769 # Line handler: control flow 770 #///////////////////////////////////////////////////////////////// 771 772 CONTROL_FLOW_KEYWORDS = [ 773 #: A list of the control flow keywords. If a line begins with 774 #: one of these keywords, then it should be handled by 775 #: C{process_control_flow_line}. 776 'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally'] 777
778 -def process_control_flow_line(line, parent_docs, prev_line_doc, 779 lineno, comments, decorators, encoding):
780 keyword = line[0][1] 781 782 # If it's a 'for' block: create the loop variable. 783 if keyword == 'for' and PARSE_FOR_BLOCKS: 784 loopvar_name = parse_dotted_name( 785 split_on(line[1:], (token.NAME, 'in'))[0]) 786 parent = get_lhs_parent(loopvar_name, parent_docs) 787 if parent is not None: 788 var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False, 789 is_imported=False, is_instvar=False, 790 docs_extracted_by='parser') 791 set_variable(parent, var_doc) 792 793 if ((keyword == 'if' and PARSE_IF_BLOCKS) or 794 (keyword == 'elif' and PARSE_ELSE_BLOCKS) or 795 (keyword == 'else' and PARSE_ELSE_BLOCKS) or 796 (keyword == 'while' and PARSE_WHILE_BLOCKS) or 797 (keyword == 'for' and PARSE_FOR_BLOCKS) or 798 (keyword == 'try' and PARSE_TRY_BLOCKS) or 799 (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or 800 (keyword == 'finally' and PARSE_FINALLY_BLOCKS)): 801 # Return "None" to indicate that we should process the 802 # block using the same context that we were already in. 803 return None 804 else: 805 # Return 'skip_block' to indicate that we should ignore 806 # the contents of this block. 807 return 'skip_block'
808 809 #///////////////////////////////////////////////////////////////// 810 # Line handler: imports 811 #///////////////////////////////////////////////////////////////// 812 # [xx] I could optionally add ValueDoc's for the imported 813 # variables with proxy_for set to the imported source; but 814 # I don't think I gain much of anything by doing so. 815
816 -def process_import(line, parent_docs, prev_line_doc, lineno, 817 comments, decorators, encoding):
818 if not isinstance(parent_docs[-1], NamespaceDoc): return 819 820 names = split_on(line[1:], (token.OP, ',')) 821 822 for name in names: 823 name_pieces = split_on(name, (token.NAME, 'as')) 824 if len(name_pieces) == 1: 825 src_name = parse_dotted_name(name_pieces[0]) 826 _import_var(src_name, parent_docs) 827 elif len(name_pieces) == 2: 828 if len(name_pieces[1]) != 1: 829 raise ParseError('Expected identifier after "as"') 830 src_name = parse_dotted_name(name_pieces[0]) 831 var_name = parse_name(name_pieces[1][0]) 832 _import_var_as(src_name, var_name, parent_docs) 833 else: 834 raise ParseError('Multiple "as" tokens in import')
835
836 -def process_from_import(line, parent_docs, prev_line_doc, lineno, 837 comments, decorators, encoding):
838 if not isinstance(parent_docs[-1], NamespaceDoc): return 839 840 pieces = split_on(line[1:], (token.NAME, 'import')) 841 if len(pieces) != 2 or not pieces[0] or not pieces[1]: 842 raise ParseError("Bad from-import") 843 lhs, rhs = pieces 844 845 # The RHS might be parenthasized, as specified by PEP 328: 846 # http://www.python.org/peps/pep-0328.html 847 if (len(rhs) == 1 and isinstance(rhs[0], list) and 848 rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')): 849 rhs = rhs[0][1:-1] 850 851 # >>> from __future__ import nested_scopes 852 if lhs == [(token.NAME, '__future__')]: 853 return 854 855 # >>> from sys import * 856 elif rhs == [(token.OP, '*')]: 857 src_name = parse_dotted_name(lhs) 858 _process_fromstar_import(src_name, parent_docs) 859 860 # >>> from os.path import join, split 861 else: 862 src_name = parse_dotted_name(lhs) 863 for elt in rhs: 864 if elt != (token.OP, ','): 865 var_name = parse_name(elt) 866 _import_var_as(DottedName(src_name, var_name), 867 var_name, parent_docs)
868
869 -def _process_fromstar_import(src, parent_docs):
870 """ 871 Handle a statement of the form: 872 >>> from <src> import * 873 874 If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse 875 the module C{M{<src>}}, and copy all of its exported variables 876 to C{parent_docs[-1]}. 877 878 Otherwise, try to determine the names of the variables exported by 879 C{M{<src>}}, and create a new variable for each export. If 880 L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if 881 found by parsing C{M{<src>}}; if it is C{'introspect'}, then the 882 list of exports is found by importing and introspecting 883 C{M{<src>}}. 884 """ 885 # Record the import 886 parent_docs[0].imports.append(src) # mark that it's .*?? 887 888 if not isinstance(parent_docs[-1], NamespaceDoc): return 889 890 # If src is package-local, then convert it to a global name. 891 src = _global_name(src, parent_docs) 892 893 # [xx] add check for if we already have the source docs in our 894 # cache?? 895 896 if (IMPORT_HANDLING == 'parse' or 897 IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok? 898 try: module_doc = _find(src) 899 except ImportError: module_doc = None 900 if isinstance(module_doc, ModuleDoc): 901 for name, imp_var in module_doc.variables.items(): 902 # [xx] this is not exactly correct, but close. It 903 # does the wrong thing if a __var__ is explicitly 904 # listed in __all__. 905 if (imp_var.is_public and 906 not (name.startswith('__') and name.endswith('__'))): 907 var_doc = _add_import_var(DottedName(src, name), name, 908 parent_docs[-1]) 909 if IMPORT_HANDLING == 'parse': 910 var_doc.value = imp_var.value 911 912 # If we got here, then either IMPORT_HANDLING='link' or we 913 # failed to parse the `src` module. 914 if IMPORT_STAR_HANDLING == 'introspect': 915 try: module = __import__(str(src), {}, {}, [0]) 916 except: return # We couldn't import it. 917 if module is None: return # We couldn't import it. 918 if hasattr(module, '__all__'): 919 names = list(module.__all__) 920 else: 921 names = [n for n in dir(module) if not n.startswith('_')] 922 for name in names: 923 _add_import_var(DottedName(src, name), name, parent_docs[-1])
924
925 -def _import_var(name, parent_docs):
926 """ 927 Handle a statement of the form: 928 >>> import <name> 929 930 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 931 the value by parsing; and create an appropriate variable in 932 parentdoc. 933 934 Otherwise, add a variable for the imported variable. (More than 935 one variable may be created for cases like C{'import a.b'}, where 936 we need to create a variable C{'a'} in parentdoc containing a 937 proxy module; and a variable C{'b'} in the proxy module. 938 """ 939 # Record the import 940 parent_docs[0].imports.append(name) 941 942 if not isinstance(parent_docs[-1], NamespaceDoc): return 943 944 # If name is package-local, then convert it to a global name. 945 src = _global_name(name, parent_docs) 946 src_prefix = src[:len(src)-len(name)] 947 948 # [xx] add check for if we already have the source docs in our 949 # cache?? 950 951 if IMPORT_HANDLING == 'parse': 952 # Check to make sure that we can actually find the value. 953 try: val_doc = _find(src) 954 except ImportError: val_doc = None 955 if val_doc is not None: 956 # We found it; but it's not the value itself we want to 957 # import, but the module containing it; so import that 958 # module (=top_mod) and create a variable for it. 959 top_mod = src_prefix+name[0] 960 var_doc = _add_import_var(top_mod, name[0], parent_docs[-1]) 961 var_doc.value = _find(DottedName(name[0])) 962 return 963 964 # If we got here, then either IMPORT_HANDLING='link', or we 965 # did not successfully find the value's docs by parsing; use 966 # a variable with an UNKNOWN value. 967 968 # Create any necessary intermediate proxy module values. 969 container = parent_docs[-1] 970 for i, identifier in enumerate(name[:-1]): 971 if (identifier not in container.variables or 972 not isinstance(container.variables[identifier], ModuleDoc)): 973 var_doc = _add_import_var(name[:i+1], identifier, container) 974 var_doc.value = ModuleDoc(variables={}, sort_spec=[], 975 proxy_for=src_prefix+name[:i+1], 976 submodules={}, 977 docs_extracted_by='parser') 978 container = container.variables[identifier].value 979 980 # Add the variable to the container. 981 _add_import_var(src, name[-1], container)
982
983 -def _import_var_as(src, name, parent_docs):
984 """ 985 Handle a statement of the form: 986 >>> import src as name 987 988 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 989 the value by parsing; and create an appropriate variable in 990 parentdoc. 991 992 Otherwise, create a variables with its C{imported_from} attribute 993 pointing to the imported object. 994 """ 995 # Record the import 996 parent_docs[0].imports.append(src) 997 998 if not isinstance(parent_docs[-1], NamespaceDoc): return 999 1000 # If src is package-local, then convert it to a global name. 1001 src = _global_name(src, parent_docs) 1002 1003 if IMPORT_HANDLING == 'parse': 1004 # Parse the value and create a variable for it. 1005 try: val_doc = _find(src) 1006 except ImportError: val_doc = None 1007 if val_doc is not None: 1008 var_doc = VariableDoc(name=name, value=val_doc, 1009 is_imported=True, is_alias=False, 1010 imported_from=src, 1011 docs_extracted_by='parser') 1012 set_variable(parent_docs[-1], var_doc) 1013 return 1014 1015 # If we got here, then either IMPORT_HANDLING='link', or we 1016 # did not successfully find the value's docs by parsing; use a 1017 # variable with a proxy value. 1018 _add_import_var(src, name, parent_docs[-1])
1019
1020 -def _add_import_var(src, name, container):
1021 """ 1022 Add a new imported variable named C{name} to C{container}, with 1023 C{imported_from=src}. 1024 """ 1025 var_doc = VariableDoc(name=name, is_imported=True, is_alias=False, 1026 imported_from=src, docs_extracted_by='parser') 1027 set_variable(container, var_doc) 1028 return var_doc
1029
1030 -def _global_name(name, parent_docs):
1031 """ 1032 If the given name is package-local (relative to the current 1033 context, as determined by C{parent_docs}), then convert it 1034 to a global name. 1035 """ 1036 # Get the containing package from parent_docs. 1037 if parent_docs[0].is_package: 1038 package = parent_docs[0] 1039 else: 1040 package = parent_docs[0].package 1041 1042 # Check each package (from closest to furthest) to see if it 1043 # contains a module named name[0]; if so, then treat `name` as 1044 # relative to that package. 1045 while package not in (None, UNKNOWN): 1046 try: 1047 fp = imp.find_module(name[0], package.path)[0] 1048 if fp is not None: fp.close() 1049 except ImportError: 1050 # No submodule found here; try the next package up. 1051 package = package.package 1052 continue 1053 # A submodule was found; return its name. 1054 return package.canonical_name + name 1055 1056 # We didn't find any package containing `name`; so just return 1057 # `name` as-is. 1058 return name
1059 1060 #///////////////////////////////////////////////////////////////// 1061 # Line handler: assignment 1062 #///////////////////////////////////////////////////////////////// 1063
1064 -def process_assignment(line, parent_docs, prev_line_doc, lineno, 1065 comments, decorators, encoding):
1066 # Divide the assignment statement into its pieces. 1067 pieces = split_on(line, (token.OP, '=')) 1068 1069 lhs_pieces = pieces[:-1] 1070 rhs = pieces[-1] 1071 1072 # Decide whether the variable is an instance variable or not. 1073 # If it's an instance var, then discard the value. 1074 is_instvar = lhs_is_instvar(lhs_pieces, parent_docs) 1075 1076 # if it's not an instance var, and we're not in a namespace, 1077 # then it's just a local var -- so ignore it. 1078 if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)): 1079 return None 1080 1081 # Evaluate the right hand side. 1082 if not is_instvar: 1083 rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs) 1084 else: 1085 rhs_val, is_alias = UNKNOWN, False 1086 1087 # Assign the right hand side value to each left hand side. 1088 # (Do the rightmost assignment first) 1089 lhs_pieces.reverse() 1090 for lhs in lhs_pieces: 1091 # Try treating the LHS as a simple dotted name. 1092 try: lhs_name = parse_dotted_name(lhs) 1093 except: lhs_name = None 1094 if lhs_name is not None: 1095 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1096 if lhs_parent is None: continue 1097 # Create the VariableDoc. 1098 var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val, 1099 is_imported=False, is_alias=is_alias, 1100 is_instvar=is_instvar, 1101 docs_extracted_by='parser') 1102 # Extract a docstring from the comments, when present, 1103 # but only if there's a single LHS. 1104 if len(lhs_pieces) == 1: 1105 add_docstring_from_comments(var_doc, comments) 1106 1107 # Assign the variable to the containing namespace, 1108 # *unless* the variable is an instance variable 1109 # without a comment docstring. In that case, we'll 1110 # only want to add it if we later discover that it's 1111 # followed by a variable docstring. If it is, then 1112 # process_docstring will take care of adding it to the 1113 # containing clas. (This is a little hackish, but 1114 # unfortunately is necessary because we won't know if 1115 # this assignment line is followed by a docstring 1116 # until later.) 1117 if (not is_instvar) or comments: 1118 set_variable(lhs_parent, var_doc, True) 1119 1120 # If it's the only var, then return the VarDoc for use 1121 # as the new `prev_line_doc`. 1122 if (len(lhs_pieces) == 1 and 1123 (len(lhs_name) == 1 or is_instvar)): 1124 return var_doc 1125 1126 # Otherwise, the LHS must be a complex expression; use 1127 # dotted_names_in() to decide what variables it contains, 1128 # and create VariableDoc's for all of them (with UNKNOWN 1129 # value). 1130 else: 1131 for lhs_name in dotted_names_in(lhs_pieces): 1132 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1133 if lhs_parent is None: continue 1134 var_doc = VariableDoc(name=lhs_name[-1], 1135 is_imported=False, 1136 is_alias=is_alias, 1137 is_instvar=is_instvar, 1138 docs_extracted_by='parser') 1139 set_variable(lhs_parent, var_doc, True) 1140 1141 # If we have multiple left-hand-sides, then all but the 1142 # rightmost one are considered aliases. 1143 is_alias = True
1144 1145
1146 -def lhs_is_instvar(lhs_pieces, parent_docs):
1147 if not isinstance(parent_docs[-1], RoutineDoc): 1148 return False 1149 # make sure that lhs_pieces is <self>.<name>, where <self> is 1150 # the name of the first arg to the containing routinedoc, and 1151 # <name> is a simple name. 1152 posargs = parent_docs[-1].posargs 1153 if not (len(lhs_pieces)==1 and len(posargs) > 0 and 1154 len(lhs_pieces[0]) == 3 and 1155 lhs_pieces[0][0] == (token.NAME, posargs[0]) and 1156 lhs_pieces[0][1] == (token.OP, '.') and 1157 lhs_pieces[0][2][0] == token.NAME): 1158 return False 1159 # Make sure we're in an instance method, and not a 1160 # module-level function. 1161 for i in range(len(parent_docs)-1, -1, -1): 1162 if isinstance(parent_docs[i], ClassDoc): 1163 return True 1164 elif parent_docs[i] != parent_docs[-1]: 1165 return False 1166 return False
1167
1168 -def rhs_to_valuedoc(rhs, parent_docs):
1169 # Dotted variable: 1170 try: 1171 rhs_name = parse_dotted_name(rhs) 1172 rhs_val = lookup_value(rhs_name, parent_docs) 1173 if rhs_val is not None: 1174 return rhs_val, True 1175 except ParseError: 1176 pass 1177 1178 # Decorators: 1179 if (len(rhs)==2 and rhs[0][0] == token.NAME and 1180 isinstance(rhs[1], list)): 1181 arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs) 1182 if isinstance(arg_val, RoutineDoc): 1183 doc = apply_decorator(DottedName(rhs[0][1]), arg_val) 1184 doc.canonical_name = UNKNOWN 1185 doc.parse_repr = pp_toktree(rhs) 1186 return doc, False 1187 1188 # Nothing else to do: make a val with the source as its repr. 1189 return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs, 1190 defining_module=parent_docs[0], 1191 docs_extracted_by='parser'), False
1192
1193 -def get_lhs_parent(lhs_name, parent_docs):
1194 assert isinstance(lhs_name, DottedName) 1195 1196 # For instance vars inside an __init__ method: 1197 if isinstance(parent_docs[-1], RoutineDoc): 1198 for i in range(len(parent_docs)-1, -1, -1): 1199 if isinstance(parent_docs[i], ClassDoc): 1200 return parent_docs[i] 1201 else: 1202 raise ValueError("%r is not a namespace or method" % 1203 parent_docs[-1]) 1204 1205 # For local variables: 1206 if len(lhs_name) == 1: 1207 return parent_docs[-1] 1208 1209 # For non-local variables: 1210 return lookup_value(lhs_name.container(), parent_docs)
1211 1212 #///////////////////////////////////////////////////////////////// 1213 # Line handler: single-line blocks 1214 #///////////////////////////////////////////////////////////////// 1215
1216 -def process_one_line_block(line, parent_docs, prev_line_doc, lineno, 1217 comments, decorators, encoding):
1218 """ 1219 The line handler for single-line blocks, such as: 1220 1221 >>> def f(x): return x*2 1222 1223 This handler calls L{process_line} twice: once for the tokens 1224 up to and including the colon, and once for the remaining 1225 tokens. The comment docstring is applied to the first line 1226 only. 1227 @return: C{None} 1228 """ 1229 i = line.index((token.OP, ':')) 1230 doc1 = process_line(line[:i+1], parent_docs, prev_line_doc, 1231 lineno, comments, decorators, encoding) 1232 doc2 = process_line(line[i+1:], parent_docs+[doc1], 1233 doc1, lineno, None, [], encoding) 1234 return doc1
1235 1236 #///////////////////////////////////////////////////////////////// 1237 # Line handler: semicolon-separated statements 1238 #///////////////////////////////////////////////////////////////// 1239
1240 -def process_multi_stmt(line, parent_docs, prev_line_doc, lineno, 1241 comments, decorators, encoding):
1242 """ 1243 The line handler for semicolon-separated statements, such as: 1244 1245 >>> x=1; y=2; z=3 1246 1247 This handler calls L{process_line} once for each statement. 1248 The comment docstring is not passed on to any of the 1249 sub-statements. 1250 @return: C{None} 1251 """ 1252 for statement in split_on(line, (token.OP, ';')): 1253 if not statement: continue 1254 doc = process_line(statement, parent_docs, prev_line_doc, 1255 lineno, None, decorators, encoding) 1256 prev_line_doc = doc 1257 decorators = [] 1258 return None
1259 1260 #///////////////////////////////////////////////////////////////// 1261 # Line handler: delete statements 1262 #///////////////////////////////////////////////////////////////// 1263
1264 -def process_del(line, parent_docs, prev_line_doc, lineno, 1265 comments, decorators, encoding):
1266 """ 1267 The line handler for delete statements, such as: 1268 1269 >>> del x, y.z 1270 1271 This handler calls L{del_variable} for each dotted variable in 1272 the variable list. The variable list may be nested. Complex 1273 expressions in the variable list (such as C{x[3]}) are ignored. 1274 @return: C{None} 1275 """ 1276 # If we're not in a namespace, then ignore it. 1277 parent_doc = parent_docs[-1] 1278 if not isinstance(parent_doc, NamespaceDoc): return 1279 1280 var_list = split_on(line[1:], (token.OP, ',')) 1281 for var_name in dotted_names_in(var_list): 1282 del_variable(parent_docs[-1], var_name) 1283 1284 return None
1285 1286 #///////////////////////////////////////////////////////////////// 1287 # Line handler: docstrings 1288 #///////////////////////////////////////////////////////////////// 1289
1290 -def process_docstring(line, parent_docs, prev_line_doc, lineno, 1291 comments, decorators, encoding):
1292 """ 1293 The line handler for bare string literals. If 1294 C{prev_line_doc} is not C{None}, then the string literal is 1295 added to that C{APIDoc} as a docstring. If it already has a 1296 docstring (from comment docstrings), then the new docstring 1297 will be appended to the old one. 1298 """ 1299 if prev_line_doc is None: return 1300 docstring = parse_string(line) 1301 1302 # If the docstring is a str, then convert it to unicode. 1303 # According to a strict reading of PEP 263, this might not be the 1304 # right thing to do; but it will almost always be what the 1305 # module's author intended. 1306 if isinstance(docstring, str): 1307 docstring = docstring.decode(encoding) 1308 1309 # If the modified APIDoc is an instance variable, and it has 1310 # not yet been added to its class's C{variables} list, 1311 # then add it now. This is done here, rather than in the 1312 # process_assignment() call that created the variable, because 1313 # we only want to add instance variables if they have an 1314 # associated docstring. (For more info, see the comment above 1315 # the set_variable() call in process_assignment().) 1316 if (isinstance(prev_line_doc, VariableDoc) and 1317 prev_line_doc.is_instvar and 1318 prev_line_doc.docstring in (None, UNKNOWN)): 1319 for i in range(len(parent_docs)-1, -1, -1): 1320 if isinstance(parent_docs[i], ClassDoc): 1321 set_variable(parent_docs[i], prev_line_doc, True) 1322 break 1323 1324 if prev_line_doc.docstring not in (None, UNKNOWN): 1325 log.warning("%s has both a comment-docstring and a normal " 1326 "(string) docstring; ignoring the comment-" 1327 "docstring." % prev_line_doc.canonical_name) 1328 1329 prev_line_doc.docstring = docstring 1330 prev_line_doc.docstring_lineno = lineno
1331 1332 1333 #///////////////////////////////////////////////////////////////// 1334 # Line handler: function declarations 1335 #///////////////////////////////////////////////////////////////// 1336
1337 -def process_funcdef(line, parent_docs, prev_line_doc, lineno, 1338 comments, decorators, encoding):
1339 """ 1340 The line handler for function declaration lines, such as: 1341 1342 >>> def f(a, b=22, (c,d)): 1343 1344 This handler creates and initializes a new C{VariableDoc} 1345 containing a C{RoutineDoc}, adds the C{VariableDoc} to the 1346 containing namespace, and returns the C{RoutineDoc}. 1347 """ 1348 # Check syntax. 1349 if len(line) != 4 or line[3] != (token.OP, ':'): 1350 raise ParseError("Bad function definition line") 1351 1352 # If we're not in a namespace, then ignore it. 1353 parent_doc = parent_docs[-1] 1354 if not isinstance(parent_doc, NamespaceDoc): return 1355 1356 # Get the function's name 1357 func_name = parse_name(line[1]) 1358 canonical_name = DottedName(parent_doc.canonical_name, func_name) 1359 1360 # Create the function's RoutineDoc. 1361 func_doc = RoutineDoc(canonical_name=canonical_name, 1362 defining_module=parent_docs[0], 1363 lineno=lineno, docs_extracted_by='parser') 1364 1365 # Process the signature. 1366 init_arglist(func_doc, line[2]) 1367 1368 # If the preceeding comment includes a docstring, then add it. 1369 add_docstring_from_comments(func_doc, comments) 1370 1371 # Apply any decorators. 1372 decorators.reverse() 1373 for decorator in decorators: 1374 try: 1375 deco_name = parse_dotted_name(decorator[1:]) 1376 except ParseError: 1377 deco_name = None 1378 if func_doc.canonical_name is not UNKNOWN: 1379 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1380 func_doc.canonical_name) 1381 elif func_doc.parse_repr not in (None, UNKNOWN): 1382 # [xx] this case should be improved.. when will func_doc 1383 # have a known parse_repr?? 1384 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1385 func_doc.parse_repr) 1386 else: 1387 deco_repr = UNKNOWN 1388 func_doc = apply_decorator(deco_name, func_doc) 1389 func_doc.canonical_name = UNKNOWN 1390 func_doc.parse_repr = deco_repr 1391 1392 # Add a variable to the containing namespace. 1393 var_doc = VariableDoc(name=func_name, value=func_doc, 1394 is_imported=False, is_alias=False, 1395 docs_extracted_by='parser') 1396 set_variable(parent_doc, var_doc) 1397 1398 # Return the new ValueDoc. 1399 return func_doc
1400
1401 -def apply_decorator(decorator_name, func_doc):
1402 # [xx] what if func_doc is not a RoutineDoc? 1403 if decorator_name == DottedName('staticmethod'): 1404 return StaticMethodDoc(**func_doc.__dict__) 1405 elif decorator_name == DottedName('classmethod'): 1406 return ClassMethodDoc(**func_doc.__dict__) 1407 elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent': 1408 return func_doc.__class__(**func_doc.__dict__) # make a copy. 1409 elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': 1410 return GenericValueDoc(docs_extracted_by='parser') 1411 else: 1412 raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR'
1413
1414 -def init_arglist(func_doc, arglist):
1415 if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): 1416 raise ParseError("Bad argument list") 1417 1418 # Initialize to defaults. 1419 func_doc.posargs = [] 1420 func_doc.posarg_defaults = [] 1421 func_doc.vararg = None 1422 func_doc.kwarg = None 1423 1424 # Divide the arglist into individual args. 1425 args = split_on(arglist[1:-1], (token.OP, ',')) 1426 1427 # Keyword argument. 1428 if args and args[-1][0] == (token.OP, '**'): 1429 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1430 raise ParseError("Expected name after ** in argument list") 1431 func_doc.kwarg = args[-1][1][1] 1432 args.pop() 1433 1434 # Vararg argument. 1435 if args and args[-1][0] == (token.OP, '*'): 1436 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1437 raise ParseError("Expected name after * in argument list") 1438 func_doc.vararg = args[-1][1][1] 1439 args.pop() 1440 1441 # Positional arguments. 1442 for arg in args: 1443 func_doc.posargs.append(parse_funcdef_arg(arg[0])) 1444 if len(arg) == 1: 1445 func_doc.posarg_defaults.append(None) 1446 elif arg[1] != (token.OP, '=') or len(arg) == 2: 1447 raise ParseError("Bad argument list") 1448 else: 1449 default_val = GenericValueDoc(parse_repr=pp_toktree(arg[2:]), 1450 docs_extracted_by='parser') 1451 func_doc.posarg_defaults.append(default_val)
1452 1453 #///////////////////////////////////////////////////////////////// 1454 # Line handler: class declarations 1455 #///////////////////////////////////////////////////////////////// 1456
1457 -def process_classdef(line, parent_docs, prev_line_doc, lineno, 1458 comments, decorators, encoding):
1459 """ 1460 The line handler for class declaration lines, such as: 1461 1462 >>> class Foo(Bar, Baz): 1463 1464 This handler creates and initializes a new C{VariableDoc} 1465 containing a C{ClassDoc}, adds the C{VariableDoc} to the 1466 containing namespace, and returns the C{ClassDoc}. 1467 """ 1468 # Check syntax 1469 if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'): 1470 raise ParseError("Bad class definition line") 1471 1472 # If we're not in a namespace, then ignore it. 1473 parent_doc = parent_docs[-1] 1474 if not isinstance(parent_doc, NamespaceDoc): return 1475 1476 # Get the class's name 1477 class_name = parse_name(line[1]) 1478 canonical_name = DottedName(parent_doc.canonical_name, class_name) 1479 1480 # Create the class's ClassDoc & VariableDoc. 1481 class_doc = ClassDoc(variables={}, sort_spec=[], 1482 bases=[], subclasses=[], 1483 canonical_name=canonical_name, 1484 defining_module=parent_docs[0], 1485 docs_extracted_by='parser') 1486 var_doc = VariableDoc(name=class_name, value=class_doc, 1487 is_imported=False, is_alias=False, 1488 docs_extracted_by='parser') 1489 1490 # Add the bases. 1491 if len(line) == 4: 1492 if (not isinstance(line[2], list) or 1493 line[2][0] != (token.OP, '(')): 1494 raise ParseError("Expected base list") 1495 try: 1496 for base_name in parse_classdef_bases(line[2]): 1497 class_doc.bases.append(find_base(base_name, parent_docs)) 1498 except ParseError, e: 1499 log.warning("Unable to extract the base list for %s: %s" % 1500 (canonical_name, e)) 1501 class_doc.bases = UNKNOWN 1502 else: 1503 class_doc.bases = [] 1504 1505 # Register ourselves as a subclass to our bases. 1506 if class_doc.bases is not UNKNOWN: 1507 for basedoc in class_doc.bases: 1508 if isinstance(basedoc, ClassDoc): 1509 basedoc.subclasses.append(class_doc) 1510 1511 # If the preceeding comment includes a docstring, then add it. 1512 add_docstring_from_comments(class_doc, comments) 1513 1514 # Add the VariableDoc to our container. 1515 set_variable(parent_doc, var_doc) 1516 1517 return class_doc
1518
1519 -def find_base(name, parent_docs):
1520 assert isinstance(name, DottedName) 1521 1522 # Find the variable containing the base. 1523 base_var = lookup_variable(name, parent_docs) 1524 if base_var is None: 1525 # If it looks like it's in an external module, then try 1526 # "importing" it. 1527 if (lookup_name(name[0], parent_docs).imported_from not in 1528 (None, UNKNOWN)): 1529 _import_var(name, parent_docs) 1530 base_var = lookup_variable(name, parent_docs) 1531 # If we still don't have a var containing the base, give up. 1532 if base_var is None: 1533 raise ParseError("Could not find %s" % name) 1534 1535 # If the variable has a value, return that value. 1536 if base_var.value != UNKNOWN: 1537 return base_var.value 1538 1539 # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for 1540 # the base class; if that fails, or if BASE_HANDLING is 'link', 1541 # just make a proxy object. 1542 if base_var.imported_from not in (None, UNKNOWN): 1543 if BASE_HANDLING == 'parse': 1544 try: 1545 return parse_docs(name=base_var.imported_from) 1546 except ParseError: 1547 pass 1548 # Either BASE_HANDLING='link' or parsing the base class failed; 1549 # return a proxy value for the base class. 1550 return ClassDoc(variables={}, sort_spec=[], bases=[], 1551 subclasses=[], proxy_for=base_var.imported_from, 1552 docs_extracted_by='parser') 1553 else: 1554 raise ParseError() # no value available for var.
1555 1556 1557 1558 1559 #///////////////////////////////////////////////////////////////// 1560 #{ Parsing 1561 #///////////////////////////////////////////////////////////////// 1562
1563 -def dotted_names_in(elt_list):
1564 """ 1565 Return a list of all simple dotted names in the given 1566 expression. 1567 """ 1568 names = [] 1569 while elt_list: 1570 elt = elt_list.pop() 1571 if len(elt) == 1 and isinstance(elt[0], list): 1572 # Nested list: process the contents 1573 elt_list.extend(split_on(elt[0][1:-1], (token.OP, ','))) 1574 else: 1575 try: 1576 names.append(parse_dotted_name(elt)) 1577 except ParseError: 1578 pass # complex expression -- ignore 1579 return names
1580
1581 -def parse_name(elt, strip_parens=False):
1582 """ 1583 If the given token tree element is a name token, then return 1584 that name as a string. Otherwise, raise ParseError. 1585 @param strip_parens: If true, then if elt is a single name 1586 enclosed in parenthases, then return that name. 1587 """ 1588 if strip_parens and isinstance(elt, list): 1589 while (isinstance(elt, list) and len(elt) == 3 and 1590 elt[0] == (token.OP, '(') and 1591 elt[-1] == (token.OP, ')')): 1592 elt = elt[1] 1593 if isinstance(elt, list) or elt[0] != token.NAME: 1594 raise ParseError("Bad name") 1595 return elt[1]
1596
1597 -def parse_dotted_name(elt_list, strip_parens=True):
1598 """ 1599 @bug: does not handle 'x.(y).z' 1600 """ 1601 if len(elt_list) == 0: raise ParseError("Bad dotted name") 1602 1603 # Handle ((x.y).z). (If the contents of the parens include 1604 # anything other than dotted names, such as (x,y), then we'll 1605 # catch it below and raise a ParseError. 1606 while (isinstance(elt_list[0], list) and 1607 len(elt_list[0]) >= 3 and 1608 elt_list[0][0] == (token.OP, '(') and 1609 elt_list[0][-1] == (token.OP, ')')): 1610 elt_list[:1] = elt_list[0][1:-1] 1611 1612 if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name") 1613 name = DottedName(parse_name(elt_list[0], True)) 1614 for i in range(2, len(elt_list), 2): 1615 dot, identifier = elt_list[i-1], elt_list[i] 1616 if dot != (token.OP, '.'): 1617 raise ParseError("Bad dotted name") 1618 name = DottedName(name, parse_name(identifier, True)) 1619 return name
1620
1621 -def split_on(elt_list, split_tok):
1622 # [xx] add code to guarantee each elt is non-empty. 1623 result = [[]] 1624 for elt in elt_list: 1625 if elt == split_tok: 1626 if result[-1] == []: raise ParseError("Empty element from split") 1627 result.append([]) 1628 else: 1629 result[-1].append(elt) 1630 if result[-1] == []: result.pop() 1631 return result
1632
1633 -def parse_funcdef_arg(elt):
1634 """ 1635 If the given tree token element contains a valid function 1636 definition argument (i.e., an identifier token or nested list 1637 of identifiers), then return a corresponding string identifier 1638 or nested list of string identifiers. Otherwise, raise a 1639 ParseError. 1640 """ 1641 if isinstance(elt, list): 1642 if elt[0] == (token.OP, '('): 1643 if len(elt) == 3: 1644 return parse_funcdef_arg(elt[1]) 1645 else: 1646 return [parse_funcdef_arg(e) 1647 for e in elt[1:-1] 1648 if e != (token.OP, ',')] 1649 else: 1650 raise ParseError("Bad argument -- expected name or tuple") 1651 elif elt[0] == token.NAME: 1652 return elt[1] 1653 else: 1654 raise ParseError("Bad argument -- expected name or tuple")
1655
1656 -def parse_classdef_bases(elt):
1657 """ 1658 If the given tree token element contains a valid base list 1659 (that contains only dotted names), then return a corresponding 1660 list of L{DottedName}s. Otherwise, raise a ParseError. 1661 1662 @bug: Does not handle either of:: 1663 - class A( (base.in.parens) ): pass 1664 - class B( (lambda:calculated.base)() ): pass 1665 """ 1666 if (not isinstance(elt, list) or 1667 elt[0] != (token.OP, '(')): 1668 raise ParseError("Bad base list") 1669 1670 return [parse_dotted_name(n) 1671 for n in split_on(elt[1:-1], (token.OP, ','))]
1672 1673 # Used by: base list; 'del'; ...
1674 -def parse_dotted_name_list(elt_list):
1675 """ 1676 If the given list of tree token elements contains a 1677 comma-separated list of dotted names, then return a 1678 corresponding list of L{DottedName} objects. Otherwise, raise 1679 ParseError. 1680 """ 1681 names = [] 1682 1683 state = 0 1684 for elt in elt_list: 1685 # State 0 -- Expecting a name, or end of arglist 1686 if state == 0: 1687 # Make sure it's a name 1688 if isinstance(elt, tuple) and elt[0] == token.NAME: 1689 names.append(DottedName(elt[1])) 1690 state = 1 1691 else: 1692 raise ParseError("Expected a name") 1693 # State 1 -- Expecting comma, period, or end of arglist 1694 elif state == 1: 1695 if elt == (token.OP, '.'): 1696 state = 2 1697 elif elt == (token.OP, ','): 1698 state = 0 1699 else: 1700 raise ParseError("Expected '.' or ',' or end of list") 1701 # State 2 -- Continuation of dotted name. 1702 elif state == 2: 1703 if isinstance(elt, tuple) and elt[0] == token.NAME: 1704 names[-1] = DottedName(names[-1], elt[1]) 1705 state = 1 1706 else: 1707 raise ParseError("Expected a name") 1708 if state == 2: 1709 raise ParseError("Expected a name") 1710 return names
1711
1712 -def parse_string(elt_list):
1713 if len(elt_list) == 1 and elt_list[0][0] == token.STRING: 1714 # [xx] use something safer here? But it needs to deal with 1715 # any string type (eg r"foo\bar" etc). 1716 return eval(elt_list[0][1]) 1717 else: 1718 raise ParseError("Expected a string")
1719 1720 # ['1', 'b', 'c']
1721 -def parse_string_list(elt_list):
1722 if (len(elt_list) == 1 and isinstance(elt_list, list) and 1723 elt_list[0][0][1] in ('(', '[')): 1724 elt_list = elt_list[0][1:-1] 1725 1726 string_list = [] 1727 for string_elt in split_on(elt_list, (token.OP, ',')): 1728 string_list.append(parse_string(string_elt)) 1729 1730 return string_list
1731 1732 #///////////////////////////////////////////////////////////////// 1733 #{ Variable Manipulation 1734 #///////////////////////////////////////////////////////////////// 1735
1736 -def set_variable(namespace, var_doc, preserve_docstring=False):
1737 """ 1738 Add var_doc to namespace. If namespace already contains a 1739 variable with the same name, then discard the old variable. If 1740 C{preserve_docstring} is true, then keep the old variable's 1741 docstring when overwriting a variable. 1742 """ 1743 # Choose which dictionary we'll be storing the variable in. 1744 if not isinstance(namespace, NamespaceDoc): 1745 return 1746 # If we already have a variable with this name, then remove the 1747 # old VariableDoc from the sort_spec list; and if we gave its 1748 # value a canonical name, then delete it. 1749 if var_doc.name in namespace.variables: 1750 namespace.sort_spec.remove(var_doc.name) 1751 old_var_doc = namespace.variables[var_doc.name] 1752 if (old_var_doc.is_alias == False and 1753 old_var_doc.value != UNKNOWN): 1754 old_var_doc.value.canonical_name = UNKNOWN 1755 if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and 1756 old_var_doc.docstring not in (None, UNKNOWN)): 1757 var_doc.docstring = old_var_doc.docstring 1758 var_doc.docstring_lineno = old_var_doc.docstring_lineno 1759 # Add the variable to the namespace. 1760 namespace.variables[var_doc.name] = var_doc 1761 namespace.sort_spec.append(var_doc.name) 1762 assert var_doc.container is UNKNOWN 1763 var_doc.container = namespace
1764
1765 -def del_variable(namespace, name):
1766 if not isinstance(namespace, NamespaceDoc): 1767 return 1768 1769 if name[0] in namespace.variables: 1770 if len(name) == 1: 1771 var_doc = namespace.variables[name[0]] 1772 namespace.sort_spec.remove(name[0]) 1773 del namespace.variables[name[0]] 1774 if not var_doc.is_alias and var_doc.value is not UNKNOWN: 1775 var_doc.value.canonical_name = UNKNOWN 1776 else: 1777 del_variable(namespace.variables[name[0]].value, name[1:])
1778 1779 #///////////////////////////////////////////////////////////////// 1780 #{ Name Lookup 1781 #///////////////////////////////////////////////////////////////// 1782
1783 -def lookup_name(identifier, parent_docs):
1784 """ 1785 Find and return the documentation for the variable named by 1786 the given identifier. 1787 1788 @rtype: L{VariableDoc} or C{None} 1789 """ 1790 # We need to check 3 namespaces: locals, globals, and builtins. 1791 # Note that this is true even if we're in a version of python with 1792 # nested scopes, because nested scope lookup does not apply to 1793 # nested class definitions, and we're not worried about variables 1794 # in nested functions. 1795 if not isinstance(identifier, basestring): 1796 raise TypeError('identifier must be a string') 1797 1798 # Locals 1799 if isinstance(parent_docs[-1], NamespaceDoc): 1800 if parent_docs[-1].variables.has_key(identifier): 1801 return parent_docs[-1].variables[identifier] 1802 1803 # Globals (aka the containing module) 1804 if isinstance(parent_docs[0], NamespaceDoc): 1805 if parent_docs[0].variables.has_key(identifier): 1806 return parent_docs[0].variables[identifier] 1807 1808 # Builtins 1809 builtins = epydoc.docintrospecter.introspect_docs(__builtin__) 1810 if isinstance(builtins, NamespaceDoc): 1811 if builtins.variables.has_key(identifier): 1812 return builtins.variables[identifier] 1813 1814 # We didn't find it; return None. 1815 return None
1816
1817 -def lookup_variable(dotted_name, parent_docs):
1818 assert isinstance(dotted_name, DottedName) 1819 # If it's a simple identifier, use lookup_name. 1820 if len(dotted_name) == 1: 1821 return lookup_name(dotted_name[0], parent_docs) 1822 1823 # If it's a dotted name with multiple pieces, look up the 1824 # namespace containing the var (=parent) first; and then 1825 # look for the var in that namespace. 1826 else: 1827 parent = lookup_value(dotted_name[:-1], parent_docs) 1828 if (isinstance(parent, NamespaceDoc) and 1829 dotted_name[-1] in parent.variables): 1830 return parent.variables[dotted_name[-1]] 1831 else: 1832 return None # var not found.
1833
1834 -def lookup_value(dotted_name, parent_docs):
1835 """ 1836 Find and return the documentation for the value contained in 1837 the variable with the given name in the current namespace. 1838 """ 1839 assert isinstance(dotted_name, DottedName) 1840 var_doc = lookup_name(dotted_name[0], parent_docs) 1841 1842 for i in range(1, len(dotted_name)): 1843 if var_doc is None: return None 1844 1845 if isinstance(var_doc.value, NamespaceDoc): 1846 var_dict = var_doc.value.variables 1847 elif (var_doc.value == UNKNOWN and 1848 var_doc.imported_from not in (None, UNKNOWN)): 1849 src_name = var_doc.imported_from + dotted_name[i:] 1850 # [xx] do I want to create a proxy here?? 1851 return GenericValueDoc(proxy_for=src_name, 1852 parse_repr=str(dotted_name), 1853 docs_extracted_by='parser') 1854 else: 1855 return None 1856 1857 var_doc = var_dict.get(dotted_name[i]) 1858 1859 if var_doc is None: return None 1860 return var_doc.value
1861 1862 #///////////////////////////////////////////////////////////////// 1863 #{ Docstring Comments 1864 #///////////////////////////////////////////////////////////////// 1865
1866 -def add_docstring_from_comments(api_doc, comments):
1867 if api_doc is None or not comments: return 1868 api_doc.docstring = '\n'.join([line for (line, lineno) in comments]) 1869 api_doc.docstring_lineno = comments[0][1]
1870 1871 #///////////////////////////////////////////////////////////////// 1872 #{ Tree tokens 1873 #///////////////////////////////////////////////////////////////// 1874
1875 -def pp_toktree(elts, spacing='normal', indent=0):
1876 s = u'' 1877 for elt in elts: 1878 # Put a blank line before class & def statements. 1879 if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'): 1880 s += '\n%s' % (' '*indent) 1881 1882 if isinstance(elt, tuple): 1883 if elt[0] == token.NEWLINE: 1884 s += ' '+elt[1] 1885 s += '\n%s' % (' '*indent) 1886 elif elt[0] == token.INDENT: 1887 s += ' ' 1888 indent += 1 1889 elif elt[0] == token.DEDENT: 1890 assert s[-4:] == ' ' 1891 s = s[:-4] 1892 indent -= 1 1893 elif elt[0] == tokenize.COMMENT: 1894 s += elt[1].rstrip() + '\n' + ' '*indent 1895 else: 1896 s += elt[1] 1897 else: 1898 elt_s = pp_toktree(elt, spacing, indent) 1899 # Join them. s = left side; elt_s = right side. 1900 if (elt_s=='' or s=='' or 1901 s in ('-','`') or elt_s in ('}',']',')','`',':') or 1902 elt_s[0] in ('.',',') or s[-1] in ('(','[','{','.','\n',' ') or 1903 (elt_s[0] == '(' and s[-1] not in (',','='))): 1904 s = '%s%s' % (s, elt_s) 1905 elif (spacing=='tight' and 1906 s[-1] in '+-*/=,' or elt_s[0] in '+-*/=,'): 1907 s = '%s%s' % (s, elt_s) 1908 else: 1909 s = '%s %s' % (s, elt_s) 1910 return s
1911 1912 #///////////////////////////////////////////////////////////////// 1913 #{ Helper Functions 1914 #///////////////////////////////////////////////////////////////// 1915
1916 -def get_module_encoding(filename):
1917 """ 1918 @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>} 1919 """ 1920 module_file = open(filename, 'rU') 1921 try: 1922 lines = [module_file.readline() for i in range(2)] 1923 if lines[0].startswith('\xef\xbb\xbf'): 1924 return 'utf-8' 1925 else: 1926 for line in lines: 1927 m = re.search("coding[:=]\s*([-\w.]+)", line) 1928 if m: return m.group(1) 1929 1930 # Fall back on Python's default encoding. 1931 return 'iso-8859-1' # aka 'latin-1' 1932 finally: 1933 module_file.close()
1934
1935 -def _get_module_name(filename, package_doc):
1936 """ 1937 Return (dotted_name, is_package) 1938 """ 1939 name = re.sub(r'.py\w?$', '', os.path.split(filename)[1]) 1940 if name == '__init__': 1941 is_package = True 1942 name = os.path.split(os.path.split(filename)[0])[1] 1943 else: 1944 is_package = False 1945 1946 # [XX] if the module contains a script, then `name` may not 1947 # necessarily be a valid identifier -- which will cause 1948 # DottedName to raise an exception. Is that what I want? 1949 if package_doc is None: 1950 dotted_name = DottedName(name) 1951 else: 1952 dotted_name = DottedName(package_doc.canonical_name, name) 1953 1954 # Check if the module looks like it's shadowed by a variable. 1955 # If so, then add a "'" to the end of its canonical name, to 1956 # distinguish it from the variable. 1957 if package_doc is not None and name in package_doc.variables: 1958 valdoc = package_doc.variables[name].value 1959 if (valdoc not in (None, UNKNOWN) and 1960 valdoc.imported_from != dotted_name): 1961 log.warning("Module %s might be shadowed by a variable with " 1962 "the same name." % dotted_name) 1963 dotted_name = DottedName(str(dotted_name)+"'") 1964 1965 return dotted_name, is_package
1966
1967 -def flatten(lst, out=None):
1968 """ 1969 @return: a flat list containing the leaves of the given nested 1970 list. 1971 @param lst: The nested list that should be flattened. 1972 """ 1973 if out is None: out = [] 1974 for elt in lst: 1975 if isinstance(elt, (list, tuple)): 1976 flatten(elt, out) 1977 else: 1978 out.append(elt) 1979 return out
1980