epydoc.markup.epytext

   1  # 
   2  # epytext.py: epydoc formatted docstring parsing 
   3  # Edward Loper 
   4  # 
   5  # Created [04/10/01 12:00 AM] 
   6  # $Id: epytext.py 1210 2006-04-10 13:25:50Z edloper $ 
   7  # 
   8   
   9  """ 
  10  Parser for epytext strings.  Epytext is a lightweight markup whose 
  11  primary intended application is Python documentation strings.  This 
  12  parser converts Epytext strings to a XML/DOM representation.  Epytext 
  13  strings can contain the following X{structural blocks}: 
  14   
  15      - X{epytext}: The top-level element of the DOM tree. 
  16      - X{para}: A paragraph of text.  Paragraphs contain no newlines,  
  17        and all spaces are soft. 
  18      - X{section}: A section or subsection. 
  19      - X{field}: A tagged field.  These fields provide information 
  20        about specific aspects of a Python object, such as the 
  21        description of a function's parameter, or the author of a 
  22        module. 
  23      - X{literalblock}: A block of literal text.  This text should be 
  24        displayed as it would be displayed in plaintext.  The 
  25        parser removes the appropriate amount of leading whitespace  
  26        from each line in the literal block. 
  27      - X{doctestblock}: A block containing sample python code, 
  28        formatted according to the specifications of the C{doctest} 
  29        module. 
  30      - X{ulist}: An unordered list. 
  31      - X{olist}: An ordered list. 
  32      - X{li}: A list item.  This tag is used both for unordered list 
  33        items and for ordered list items. 
  34   
  35  Additionally, the following X{inline regions} may be used within 
  36  C{para} blocks: 
  37       
  38      - X{code}:   Source code and identifiers. 
  39      - X{math}:   Mathematical expressions. 
  40      - X{index}:  A term which should be included in an index, if one 
  41                   is generated. 
  42      - X{italic}: Italicized text. 
  43      - X{bold}:   Bold-faced text. 
  44      - X{uri}:    A Universal Resource Indicator (URI) or Universal 
  45                   Resource Locator (URL) 
  46      - X{link}:   A Python identifier which should be hyperlinked to 
  47                   the named object's documentation, when possible. 
  48   
  49  The returned DOM tree will conform to the the following Document Type 
  50  Description:: 
  51   
  52     <!ENTITY % colorized '(code | math | index | italic | 
  53                            bold | uri | link | symbol)*'> 
  54   
  55     <!ELEMENT epytext ((para | literalblock | doctestblock | 
  56                        section | ulist | olist)*, fieldlist?)> 
  57   
  58     <!ELEMENT para (#PCDATA | %colorized;)*> 
  59   
  60     <!ELEMENT section (para | listblock | doctestblock | 
  61                        section | ulist | olist)+> 
  62   
  63     <!ELEMENT fieldlist (field+)> 
  64     <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) 
  65                                  ulist | olist)+)> 
  66     <!ELEMENT tag (#PCDATA)> 
  67     <!ELEMENT arg (#PCDATA)> 
  68      
  69     <!ELEMENT literalblock (#PCDATA)> 
  70     <!ELEMENT doctestblock (#PCDATA)> 
  71   
  72     <!ELEMENT ulist (li+)> 
  73     <!ELEMENT olist (li+)> 
  74     <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> 
  75     <!ATTLIST li bullet NMTOKEN #IMPLIED> 
  76     <!ATTLIST olist start NMTOKEN #IMPLIED> 
  77   
  78     <!ELEMENT uri     (name, target)> 
  79     <!ELEMENT link    (name, target)> 
  80     <!ELEMENT name    (#PCDATA | %colorized;)*> 
  81     <!ELEMENT target  (#PCDATA)> 
  82      
  83     <!ELEMENT code    (#PCDATA | %colorized;)*> 
  84     <!ELEMENT math    (#PCDATA | %colorized;)*> 
  85     <!ELEMENT italic  (#PCDATA | %colorized;)*> 
  86     <!ELEMENT bold    (#PCDATA | %colorized;)*> 
  87     <!ELEMENT indexed (#PCDATA | %colorized;)> 
  88   
  89     <!ELEMENT symbol (#PCDATA)> 
  90   
  91  @var SYMBOLS: A list of the of escape symbols that are supported 
  92        by epydoc.  Currently the following symbols are supported: 
  93  <<<SYMBOLS>>> 
  94  """ 
  95  # Note: the symbol list is appended to the docstring automatically, 
  96  # below. 
  97   
  98  __docformat__ = 'epytext en' 
  99   
 100  # Code organization.. 
 101  #   1. parse() 
 102  #   2. tokenize() 
 103  #   3. colorize() 
 104  #   4. helpers 
 105  #   5. testing 
 106   
 107  import re, string, types, sys, os.path 
 108  from xml.dom.minidom import Document, Text 
 109  import xml.dom.minidom 
 110  from epydoc.markup import * 
 111  from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex 
 112  from epydoc.docwriter.html_colorize import colorize_doctestblock 
 113   
 114  ################################################## 
 115  ## Constants 
 116  ################################################## 
 117   
 118  # The possible heading underline characters, listed in order of 
 119  # heading depth.  
 120  _HEADING_CHARS = "=-~" 
 121   
 122  # Escape codes.  These should be needed very rarely. 
 123  _ESCAPES = {'lb':'{', 'rb': '}'} 
 124   
 125  # Symbols.  These can be generated via S{...} escapes. 
 126  SYMBOLS = [ 
 127      # Arrows 
 128      '<-', '->', '^', 'v',  
 129   
 130      # Greek letters 
 131      'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',   
 132      'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',   
 133      'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',   
 134      'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 
 135      'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',   
 136      'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',   
 137      'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',   
 138      'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', 
 139       
 140      # HTML character entities 
 141      'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', 
 142      'lArr', 'rArr', 'uArr', 'dArr', 'hArr',  
 143      'copy', 'times', 'forall', 'exist', 'part', 
 144      'empty', 'isin', 'notin', 'ni', 'prod', 'sum', 
 145      'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 
 146      'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 
 147      'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 
 148      'sube', 'supe', 'oplus', 'otimes', 'perp', 
 149   
 150      # Alternate (long) names 
 151      'infinity', 'integral', 'product', 
 152      '>=', '<=',  
 153      ] 
 154  # Convert to a dictionary, for quick lookup 
 155  _SYMBOLS = {} 
 156  for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 
 157   
 158  # Add symbols to the docstring. 
 159  symblist = '      ' 
 160  symblist += ';\n      '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) 
 161                                for symbol in SYMBOLS]) 
 162  __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) 
 163  del symbol, symblist 
 164   
 165  # Tags for colorizing text. 
 166  _COLORIZING_TAGS = { 
 167      'C': 'code', 
 168      'M': 'math', 
 169      'X': 'indexed', 
 170      'I': 'italic',  
 171      'B': 'bold', 
 172      'U': 'uri', 
 173      'L': 'link',       # A Python identifier that should be linked to  
 174      'E': 'escape',     # escapes characters or creates symbols 
 175      'S': 'symbol', 
 176      'G': 'graph', 
 177      } 
 178   
 179  # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? 
 180  _LINK_COLORIZING_TAGS = ['link', 'uri'] 
 181   
 182  ################################################## 
 183  ## Structuring (Top Level) 
 184  ################################################## 
 185   
 186 -def parse(str, errors = None): 
 187      """ 
 188      Return a DOM tree encoding the contents of an epytext string.  Any 
 189      errors generated during parsing will be stored in C{errors}. 
 190   
 191      @param str: The epytext string to parse. 
 192      @type str: C{string} 
 193      @param errors: A list where any errors generated during parsing 
 194          will be stored.  If no list is specified, then fatal errors 
 195          will generate exceptions, and non-fatal errors will be 
 196          ignored. 
 197      @type errors: C{list} of L{ParseError} 
 198      @return: a DOM tree encoding the contents of an epytext string. 
 199      @rtype: L{xml.dom.minidom.Document} 
 200      @raise ParseError: If C{errors} is C{None} and an error is 
 201          encountered while parsing. 
 202      """ 
 203      # Initialize errors list. 
 204      if errors == None: 
 205          errors = [] 
 206          raise_on_error = 1 
 207      else: 
 208          raise_on_error = 0 
 209   
 210      # Preprocess the string. 
 211      str = re.sub('\015\012', '\012', str) 
 212      str = string.expandtabs(str) 
 213   
 214      # Tokenize the input string. 
 215      tokens = _tokenize(str, errors) 
 216   
 217      # Have we encountered a field yet? 
 218      encountered_field = 0 
 219   
 220      # Create an XML document to hold the epytext. 
 221      doc = Document() 
 222   
 223      # Maintain two parallel stacks: one contains DOM elements, and 
 224      # gives the ancestors of the current block.  The other contains 
 225      # indentation values, and gives the indentation of the 
 226      # corresponding DOM elements.  An indentation of "None" reflects 
 227      # an unknown indentation.  However, the indentation must be 
 228      # greater than, or greater than or equal to, the indentation of 
 229      # the prior element (depending on what type of DOM element it 
 230      # corresponds to).  No 2 consecutive indent_stack values will be 
 231      # ever be "None."  Use initial dummy elements in the stack, so we 
 232      # don't have to worry about bounds checking. 
 233      stack = [None, doc.createElement('epytext')] 
 234      indent_stack = [-1, None] 
 235   
 236      for token in tokens: 
 237          # Uncomment this for debugging: 
 238          #print ('%s: %s\n%s: %s\n' %  
 239          #       (''.join(['%-11s' % (t and t.tagName) for t in stack]), 
 240          #        token.tag, ''.join(['%-11s' % i for i in indent_stack]), 
 241          #        token.indent)) 
 242           
 243          # Pop any completed blocks off the stack. 
 244          _pop_completed_blocks(token, stack, indent_stack) 
 245   
 246          # If Token has type PARA, colorize and add the new paragraph 
 247          if token.tag == Token.PARA: 
 248              _add_para(doc, token, stack, indent_stack, errors) 
 249                        
 250          # If Token has type HEADING, add the new section 
 251          elif token.tag == Token.HEADING: 
 252              _add_section(doc, token, stack, indent_stack, errors) 
 253   
 254          # If Token has type LBLOCK, add the new literal block 
 255          elif token.tag == Token.LBLOCK: 
 256              stack[-1].appendChild(token.to_dom(doc)) 
 257   
 258          # If Token has type DTBLOCK, add the new doctest block 
 259          elif token.tag == Token.DTBLOCK: 
 260              stack[-1].appendChild(token.to_dom(doc)) 
 261   
 262          # If Token has type BULLET, add the new list/list item/field 
 263          elif token.tag == Token.BULLET: 
 264              _add_list(doc, token, stack, indent_stack, errors) 
 265          else: 
 266              assert 0, 'Unknown token type: '+token.tag 
 267   
 268          # Check if the DOM element we just added was a field.. 
 269          if stack[-1].tagName == 'field': 
 270              encountered_field = 1 
 271          elif encountered_field == 1: 
 272              if len(stack) <= 3: 
 273                  estr = ("Fields must be the final elements in an "+ 
 274                          "epytext string.") 
 275                  errors.append(StructuringError(estr, token.startline)) 
 276   
 277      # If there was an error, then signal it! 
 278      if len([e for e in errors if e.is_fatal()]) > 0: 
 279          if raise_on_error: 
 280              raise errors[0] 
 281          else: 
 282              return None 
 283           
 284      # Return the top-level epytext DOM element. 
 285      doc.appendChild(stack[1]) 
 286      return doc 
 287   
 288 -def _pop_completed_blocks(token, stack, indent_stack): 
 289      """ 
 290      Pop any completed blocks off the stack.  This includes any 
 291      blocks that we have dedented past, as well as any list item 
 292      blocks that we've dedented to.  The top element on the stack  
 293      should only be a list if we're about to start a new list 
 294      item (i.e., if the next token is a bullet). 
 295      """ 
 296      indent = token.indent 
 297      if indent != None: 
 298          while (len(stack) > 2): 
 299              pop = 0 
 300               
 301              # Dedent past a block 
 302              if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 
 303              elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 
 304   
 305              # Dedent to a list item, if it is follwed by another list 
 306              # item with the same indentation. 
 307              elif (token.tag == 'bullet' and indent==indent_stack[-2] and  
 308                    stack[-1].tagName in ('li', 'field')): pop=1 
 309   
 310              # End of a list (no more list items available) 
 311              elif (stack[-1].tagName in ('ulist', 'olist') and 
 312                    (token.tag != 'bullet' or token.contents[-1] == ':')): 
 313                  pop=1 
 314   
 315              # Pop the block, if it's complete.  Otherwise, we're done. 
 316              if pop == 0: return 
 317              stack.pop() 
 318              indent_stack.pop() 
 319   
 320 -def _add_para(doc, para_token, stack, indent_stack, errors): 
 321      """Colorize the given paragraph, and add it to the DOM tree.""" 
 322      # Check indentation, and update the parent's indentation 
 323      # when appropriate. 
 324      if indent_stack[-1] == None: 
 325          indent_stack[-1] = para_token.indent 
 326      if para_token.indent == indent_stack[-1]: 
 327          # Colorize the paragraph and add it. 
 328          para = _colorize(doc, para_token, errors) 
 329          stack[-1].appendChild(para) 
 330      else: 
 331          estr = "Improper paragraph indentation." 
 332          errors.append(StructuringError(estr, para_token.startline)) 
 333   
 334 -def _add_section(doc, heading_token, stack, indent_stack, errors): 
 335      """Add a new section to the DOM tree, with the given heading.""" 
 336      if indent_stack[-1] == None: 
 337          indent_stack[-1] = heading_token.indent 
 338      elif indent_stack[-1] != heading_token.indent: 
 339          estr = "Improper heading indentation." 
 340          errors.append(StructuringError(estr, heading_token.startline)) 
 341   
 342      # Check for errors. 
 343      for tok in stack[2:]: 
 344          if tok.tagName != "section": 
 345              estr = "Headings must occur at the top level." 
 346              errors.append(StructuringError(estr, heading_token.startline)) 
 347              break 
 348      if (heading_token.level+2) > len(stack): 
 349          estr = "Wrong underline character for heading." 
 350          errors.append(StructuringError(estr, heading_token.startline)) 
 351   
 352      # Pop the appropriate number of headings so we're at the 
 353      # correct level. 
 354      stack[heading_token.level+2:] = [] 
 355      indent_stack[heading_token.level+2:] = [] 
 356   
 357      # Colorize the heading 
 358      head = _colorize(doc, heading_token, errors, 'heading') 
 359   
 360      # Add the section's and heading's DOM elements. 
 361      sec = doc.createElement("section") 
 362      stack[-1].appendChild(sec) 
 363      stack.append(sec) 
 364      sec.appendChild(head) 
 365      indent_stack.append(None) 
 366           
 367 -def _add_list(doc, bullet_token, stack, indent_stack, errors): 
 368      """ 
 369      Add a new list item or field to the DOM tree, with the given 
 370      bullet or field tag.  When necessary, create the associated 
 371      list. 
 372      """ 
 373      # Determine what type of bullet it is. 
 374      if bullet_token.contents[-1] == '-': 
 375          list_type = 'ulist' 
 376      elif bullet_token.contents[-1] == '.': 
 377          list_type = 'olist' 
 378      elif bullet_token.contents[-1] == ':': 
 379          list_type = 'fieldlist' 
 380      else: 
 381          raise AssertionError('Bad Bullet: %r' % bullet_token.contents) 
 382   
 383      # Is this a new list? 
 384      newlist = 0 
 385      if stack[-1].tagName != list_type: 
 386          newlist = 1 
 387      elif list_type == 'olist' and stack[-1].tagName == 'olist': 
 388          old_listitem = stack[-1].childNodes[-1] 
 389          old_bullet = old_listitem.getAttribute("bullet").split('.')[:-1] 
 390          new_bullet = bullet_token.contents.split('.')[:-1] 
 391          if (new_bullet[:-1] != old_bullet[:-1] or 
 392              int(new_bullet[-1]) != int(old_bullet[-1])+1): 
 393              newlist = 1 
 394   
 395      # Create the new list. 
 396      if newlist: 
 397          if stack[-1].tagName is 'fieldlist': 
 398              # The new list item is not a field list item (since this 
 399              # is a new list); but it's indented the same as the field 
 400              # list.  This either means that they forgot to indent the 
 401              # list, or they are trying to put something after the 
 402              # field list.  The first one seems more likely, so we'll 
 403              # just warn about that (to avoid confusion). 
 404              estr = "Lists must be indented." 
 405              errors.append(StructuringError(estr, bullet_token.startline)) 
 406          if stack[-1].tagName in ('ulist', 'olist', 'fieldlist'): 
 407              stack.pop() 
 408              indent_stack.pop() 
 409   
 410          if (list_type != 'fieldlist' and indent_stack[-1] is not None and 
 411              bullet_token.indent == indent_stack[-1]): 
 412              # Ignore this error if there's text on the same line as 
 413              # the comment-opening quote -- epydoc can't reliably 
 414              # determine the indentation for that line. 
 415              if bullet_token.startline != 1 or bullet_token.indent != 0: 
 416                  estr = "Lists must be indented." 
 417                  errors.append(StructuringError(estr, bullet_token.startline)) 
 418   
 419          if list_type == 'fieldlist': 
 420              # Fieldlist should be at the top-level. 
 421              for tok in stack[2:]: 
 422                  if tok.tagName != "section": 
 423                      estr = "Fields must be at the top level." 
 424                      errors.append( 
 425                          StructuringError(estr, bullet_token.startline)) 
 426                      break 
 427              stack[2:] = [] 
 428              indent_stack[2:] = [] 
 429   
 430          # Add the new list. 
 431          lst = doc.createElement(list_type) 
 432          stack[-1].appendChild(lst) 
 433          stack.append(lst) 
 434          indent_stack.append(bullet_token.indent) 
 435          if list_type == 'olist': 
 436              start = bullet_token.contents.split('.')[:-1] 
 437              if start != '1': 
 438                  lst.setAttribute("start", start[-1]) 
 439   
 440      # Fields are treated somewhat specially: A "fieldlist" 
 441      # node is created to make the parsing simpler, but fields 
 442      # are adjoined directly into the "epytext" node, not into 
 443      # the "fieldlist" node. 
 444      if list_type == 'fieldlist': 
 445          li = doc.createElement("field") 
 446          token_words = bullet_token.contents[1:-1].split(None, 1) 
 447          tag_elt = doc.createElement("tag") 
 448          tag_elt.appendChild(doc.createTextNode(token_words[0])) 
 449          li.appendChild(tag_elt) 
 450   
 451          if len(token_words) > 1: 
 452              arg_elt = doc.createElement("arg") 
 453              arg_elt.appendChild(doc.createTextNode(token_words[1])) 
 454              li.appendChild(arg_elt) 
 455      else: 
 456          li = doc.createElement("li") 
 457          if list_type == 'olist': 
 458              li.setAttribute("bullet", bullet_token.contents) 
 459   
 460      # Add the bullet. 
 461      stack[-1].appendChild(li) 
 462      stack.append(li) 
 463      indent_stack.append(None) 
 464   
 465           
 466  ################################################## 
 467  ## Tokenization 
 468  ################################################## 
 469   
 470 -class Token: 
 471      """ 
 472      C{Token}s are an intermediate data structure used while 
 473      constructing the structuring DOM tree for a formatted docstring. 
 474      There are five types of C{Token}: 
 475       
 476          - Paragraphs 
 477          - Literal blocks 
 478          - Doctest blocks 
 479          - Headings 
 480          - Bullets 
 481   
 482      The text contained in each C{Token} is stored in the 
 483      C{contents} variable.  The string in this variable has been 
 484      normalized.  For paragraphs, this means that it has been converted  
 485      into a single line of text, with newline/indentation replaced by 
 486      single spaces.  For literal blocks and doctest blocks, this means 
 487      that the appropriate amount of leading whitespace has been removed  
 488      from each line. 
 489   
 490      Each C{Token} has an indentation level associated with it, 
 491      stored in the C{indent} variable.  This indentation level is used 
 492      by the structuring procedure to assemble hierarchical blocks. 
 493   
 494      @type tag: C{string} 
 495      @ivar tag: This C{Token}'s type.  Possible values are C{Token.PARA}  
 496          (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} 
 497          (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. 
 498           
 499      @type startline: C{int} 
 500      @ivar startline: The line on which this C{Token} begins.  This  
 501          line number is only used for issuing errors. 
 502   
 503      @type contents: C{string} 
 504      @ivar contents: The normalized text contained in this C{Token}. 
 505       
 506      @type indent: C{int} or C{None} 
 507      @ivar indent: The indentation level of this C{Token} (in 
 508          number of leading spaces).  A value of C{None} indicates an 
 509          unknown indentation; this is used for list items and fields 
 510          that begin with one-line paragraphs. 
 511           
 512      @type level: C{int} or C{None} 
 513      @ivar level: The heading-level of this C{Token} if it is a 
 514          heading; C{None}, otherwise.  Valid heading levels are 0, 1, 
 515          and 2. 
 516   
 517      @type PARA: C{string} 
 518      @cvar PARA: The C{tag} value for paragraph C{Token}s. 
 519      @type LBLOCK: C{string} 
 520      @cvar LBLOCK: The C{tag} value for literal C{Token}s. 
 521      @type DTBLOCK: C{string} 
 522      @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. 
 523      @type HEADING: C{string} 
 524      @cvar HEADING: The C{tag} value for heading C{Token}s. 
 525      @type BULLET: C{string} 
 526      @cvar BULLET: The C{tag} value for bullet C{Token}s.  This C{tag} 
 527          value is also used for field tag C{Token}s, since fields 
 528          function syntactically the same as list items. 
 529      """ 
 530      # The possible token types. 
 531      PARA = "para" 
 532      LBLOCK = "literalblock" 
 533      DTBLOCK = "doctestblock" 
 534      HEADING = "heading" 
 535      BULLET = "bullet" 
 536   
 537 -    def __init__(self, tag, startline, contents, indent, level=None): 
 538          """ 
 539          Create a new C{Token}. 
 540   
 541          @param tag: The type of the new C{Token}. 
 542          @type tag: C{string} 
 543          @param startline: The line on which the new C{Token} begins. 
 544          @type startline: C{int} 
 545          @param contents: The normalized contents of the new C{Token}. 
 546          @type contents: C{string} 
 547          @param indent: The indentation of the new C{Token} (in number 
 548              of leading spaces).  A value of C{None} indicates an 
 549              unknown indentation. 
 550          @type indent: C{int} or C{None} 
 551          @param level: The heading-level of this C{Token} if it is a 
 552              heading; C{None}, otherwise. 
 553          @type level: C{int} or C{None} 
 554          """ 
 555          self.tag = tag 
 556          self.startline = startline 
 557          self.contents = contents 
 558          self.indent = indent 
 559          self.level = level 
 560   
 561 -    def __repr__(self): 
 562          """ 
 563          @rtype: C{string} 
 564          @return: the formal representation of this C{Token}. 
 565              C{Token}s have formal representaitons of the form::  
 566                  <Token: para at line 12> 
 567          """ 
 568          return '<Token: %s at line %s>' % (self.tag, self.startline) 
 569   
 570 -    def to_dom(self, doc): 
 571          """ 
 572          @return: a DOM representation of this C{Token}. 
 573          @rtype: L{xml.dom.minidom.Element} 
 574          """ 
 575          e = doc.createElement(self.tag) 
 576          e.appendChild(doc.createTextNode(self.contents)) 
 577          return e 
 578   
 579  # Construct regular expressions for recognizing bullets.  These are 
 580  # global so they don't have to be reconstructed each time we tokenize 
 581  # a docstring. 
 582  _ULIST_BULLET = '[-]( +|$)' 
 583  _OLIST_BULLET = '(\d+[.])+( +|$)' 
 584  _FIELD_BULLET = '@\w+( [^{}:\n]+)?:( +|$)' 
 585  _BULLET_RE = re.compile(_ULIST_BULLET + '|' + 
 586                          _OLIST_BULLET + '|' + 
 587                          _FIELD_BULLET) 
 588  _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) 
 589  _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) 
 590  del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET 
 591   
 592 -def _tokenize_doctest(lines, start, block_indent, tokens, errors): 
 593      """ 
 594      Construct a L{Token} containing the doctest block starting at 
 595      C{lines[start]}, and append it to C{tokens}.  C{block_indent} 
 596      should be the indentation of the doctest block.  Any errors 
 597      generated while tokenizing the doctest block will be appended to 
 598      C{errors}. 
 599   
 600      @param lines: The list of lines to be tokenized 
 601      @param start: The index into C{lines} of the first line of the 
 602          doctest block to be tokenized. 
 603      @param block_indent: The indentation of C{lines[start]}.  This is 
 604          the indentation of the doctest block. 
 605      @param errors: A list where any errors generated during parsing 
 606          will be stored.  If no list is specified, then errors will  
 607          generate exceptions. 
 608      @return: The line number of the first line following the doctest 
 609          block. 
 610           
 611      @type lines: C{list} of C{string} 
 612      @type start: C{int} 
 613      @type block_indent: C{int} 
 614      @type tokens: C{list} of L{Token} 
 615      @type errors: C{list} of L{ParseError} 
 616      @rtype: C{int} 
 617      """ 
 618      # If they dedent past block_indent, keep track of the minimum 
 619      # indentation.  This is used when removing leading indentation 
 620      # from the lines of the doctest block. 
 621      min_indent = block_indent 
 622   
 623      linenum = start + 1 
 624      while linenum < len(lines): 
 625          # Find the indentation of this line. 
 626          line = lines[linenum] 
 627          indent = len(line) - len(line.lstrip()) 
 628           
 629          # A blank line ends doctest block. 
 630          if indent == len(line): break 
 631           
 632          # A Dedent past block_indent is an error. 
 633          if indent < block_indent: 
 634              min_indent = min(min_indent, indent) 
 635              estr = 'Improper doctest block indentation.' 
 636              errors.append(TokenizationError(estr, linenum)) 
 637   
 638          # Go on to the next line. 
 639          linenum += 1 
 640   
 641      # Add the token, and return the linenum after the token ends. 
 642      contents = [line[min_indent:] for line in lines[start:linenum]] 
 643      contents = '\n'.join(contents) 
 644      tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) 
 645      return linenum 
 646   
 647 -def _tokenize_literal(lines, start, block_indent, tokens, errors): 
 648      """ 
 649      Construct a L{Token} containing the literal block starting at 
 650      C{lines[start]}, and append it to C{tokens}.  C{block_indent} 
 651      should be the indentation of the literal block.  Any errors 
 652      generated while tokenizing the literal block will be appended to 
 653      C{errors}. 
 654   
 655      @param lines: The list of lines to be tokenized 
 656      @param start: The index into C{lines} of the first line of the 
 657          literal block to be tokenized. 
 658      @param block_indent: The indentation of C{lines[start]}.  This is 
 659          the indentation of the literal block. 
 660      @param errors: A list of the errors generated by parsing.  Any 
 661          new errors generated while will tokenizing this paragraph 
 662          will be appended to this list. 
 663      @return: The line number of the first line following the literal 
 664          block.  
 665           
 666      @type lines: C{list} of C{string} 
 667      @type start: C{int} 
 668      @type block_indent: C{int} 
 669      @type tokens: C{list} of L{Token} 
 670      @type errors: C{list} of L{ParseError} 
 671      @rtype: C{int} 
 672      """ 
 673      linenum = start + 1 
 674      while linenum < len(lines): 
 675          # Find the indentation of this line. 
 676          line = lines[linenum] 
 677          indent = len(line) - len(line.lstrip()) 
 678   
 679          # A Dedent to block_indent ends the literal block. 
 680          # (Ignore blank likes, though) 
 681          if len(line) != indent and indent <= block_indent: 
 682              break 
 683           
 684          # Go on to the next line. 
 685          linenum += 1 
 686   
 687      # Add the token, and return the linenum after the token ends. 
 688      contents = [line[block_indent+1:] for line in lines[start:linenum]] 
 689      contents = '\n'.join(contents) 
 690      contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) 
 691      tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) 
 692      return linenum 
 693   
 694 -def _tokenize_listart(lines, start, bullet_indent, tokens, errors): 
 695      """ 
 696      Construct L{Token}s for the bullet and the first paragraph of the 
 697      list item (or field) starting at C{lines[start]}, and append them 
 698      to C{tokens}.  C{bullet_indent} should be the indentation of the 
 699      list item.  Any errors generated while tokenizing will be 
 700      appended to C{errors}. 
 701   
 702      @param lines: The list of lines to be tokenized 
 703      @param start: The index into C{lines} of the first line of the 
 704          list item to be tokenized. 
 705      @param bullet_indent: The indentation of C{lines[start]}.  This is 
 706          the indentation of the list item. 
 707      @param errors: A list of the errors generated by parsing.  Any 
 708          new errors generated while will tokenizing this paragraph 
 709          will be appended to this list. 
 710      @return: The line number of the first line following the list 
 711          item's first paragraph. 
 712           
 713      @type lines: C{list} of C{string} 
 714      @type start: C{int} 
 715      @type bullet_indent: C{int} 
 716      @type tokens: C{list} of L{Token} 
 717      @type errors: C{list} of L{ParseError} 
 718      @rtype: C{int} 
 719      """ 
 720      linenum = start + 1 
 721      para_indent = None 
 722      doublecolon = lines[start].rstrip()[-2:] == '::' 
 723   
 724      # Get the contents of the bullet. 
 725      para_start = _BULLET_RE.match(lines[start], bullet_indent).end() 
 726      bcontents = lines[start][bullet_indent:para_start].strip() 
 727       
 728      while linenum < len(lines): 
 729          # Find the indentation of this line. 
 730          line = lines[linenum] 
 731          indent = len(line) - len(line.lstrip()) 
 732   
 733          # "::" markers end paragraphs. 
 734          if doublecolon: break 
 735          if line.rstrip()[-2:] == '::': doublecolon = 1 
 736   
 737          # A blank line ends the token 
 738          if indent == len(line): break 
 739   
 740          # Dedenting past bullet_indent ends the list item. 
 741          if indent < bullet_indent: break 
 742           
 743          # A line beginning with a bullet ends the token. 
 744          if _BULLET_RE.match(line, indent): break 
 745           
 746          # If this is the second line, set the paragraph indentation, or  
 747          # end the token, as appropriate. 
 748          if para_indent == None: para_indent = indent 
 749   
 750          # A change in indentation ends the token 
 751          if indent != para_indent: break 
 752   
 753          # Go on to the next line. 
 754          linenum += 1 
 755   
 756      # Add the bullet token. 
 757      tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent)) 
 758   
 759      # Add the paragraph token. 
 760      pcontents = ([lines[start][para_start:].strip()] +  
 761                   [line.strip() for line in lines[start+1:linenum]]) 
 762      pcontents = ' '.join(pcontents).strip() 
 763      if pcontents: 
 764          tokens.append(Token(Token.PARA, start, pcontents, para_indent)) 
 765   
 766      # Return the linenum after the paragraph token ends. 
 767      return linenum 
 768   
 769 -def _tokenize_para(lines, start, para_indent, tokens, errors): 
 770      """ 
 771      Construct a L{Token} containing the paragraph starting at 
 772      C{lines[start]}, and append it to C{tokens}.  C{para_indent} 
 773      should be the indentation of the paragraph .  Any errors 
 774      generated while tokenizing the paragraph will be appended to 
 775      C{errors}. 
 776   
 777      @param lines: The list of lines to be tokenized 
 778      @param start: The index into C{lines} of the first line of the 
 779          paragraph to be tokenized. 
 780      @param para_indent: The indentation of C{lines[start]}.  This is 
 781          the indentation of the paragraph. 
 782      @param errors: A list of the errors generated by parsing.  Any 
 783          new errors generated while will tokenizing this paragraph 
 784          will be appended to this list. 
 785      @return: The line number of the first line following the 
 786          paragraph.  
 787           
 788      @type lines: C{list} of C{string} 
 789      @type start: C{int} 
 790      @type para_indent: C{int} 
 791      @type tokens: C{list} of L{Token} 
 792      @type errors: C{list} of L{ParseError} 
 793      @rtype: C{int} 
 794      """ 
 795      linenum = start + 1 
 796      doublecolon = 0 
 797      while linenum < len(lines): 
 798          # Find the indentation of this line. 
 799          line = lines[linenum] 
 800          indent = len(line) - len(line.lstrip()) 
 801   
 802          # "::" markers end paragraphs. 
 803          if doublecolon: break 
 804          if line.rstrip()[-2:] == '::': doublecolon = 1 
 805   
 806          # Blank lines end paragraphs 
 807          if indent == len(line): break 
 808   
 809          # Indentation changes end paragraphs 
 810          if indent != para_indent: break 
 811   
 812          # List bullets end paragraphs 
 813          if _BULLET_RE.match(line, indent): break 
 814   
 815          # Check for mal-formatted field items. 
 816          if line[indent] == '@': 
 817              estr = "Possible mal-formatted field item." 
 818              errors.append(TokenizationError(estr, linenum, is_fatal=0)) 
 819               
 820          # Go on to the next line. 
 821          linenum += 1 
 822   
 823      contents = [line.strip() for line in lines[start:linenum]] 
 824       
 825      # Does this token look like a heading? 
 826      if ((len(contents) < 2) or 
 827          (contents[1][0] not in _HEADING_CHARS) or 
 828          (abs(len(contents[0])-len(contents[1])) > 5)): 
 829          looks_like_heading = 0 
 830      else: 
 831          looks_like_heading = 1 
 832          for char in contents[1]: 
 833              if char != contents[1][0]: 
 834                  looks_like_heading = 0 
 835                  break 
 836   
 837      if looks_like_heading: 
 838          if len(contents[0]) != len(contents[1]): 
 839              estr = ("Possible heading typo: the number of "+ 
 840                      "underline characters must match the "+ 
 841                      "number of heading characters.") 
 842              errors.append(TokenizationError(estr, start, is_fatal=0)) 
 843          else: 
 844              level = _HEADING_CHARS.index(contents[1][0]) 
 845              tokens.append(Token(Token.HEADING, start, 
 846                                  contents[0], para_indent, level)) 
 847              return start+2 
 848                    
 849      # Add the paragraph token, and return the linenum after it ends. 
 850      contents = ' '.join(contents) 
 851      tokens.append(Token(Token.PARA, start, contents, para_indent)) 
 852      return linenum 
 853           
 854 -def _tokenize(str, errors): 
 855      """ 
 856      Split a given formatted docstring into an ordered list of 
 857      C{Token}s, according to the epytext markup rules. 
 858   
 859      @param str: The epytext string 
 860      @type str: C{string} 
 861      @param errors: A list where any errors generated during parsing 
 862          will be stored.  If no list is specified, then errors will  
 863          generate exceptions. 
 864      @type errors: C{list} of L{ParseError} 
 865      @return: a list of the C{Token}s that make up the given string. 
 866      @rtype: C{list} of L{Token} 
 867      """ 
 868      tokens = [] 
 869      lines = str.split('\n') 
 870   
 871      # Scan through the lines, determining what @type of token we're 
 872      # dealing with, and tokenizing it, as appropriate. 
 873      linenum = 0 
 874      while linenum < len(lines): 
 875          # Get the current line and its indentation. 
 876          line = lines[linenum] 
 877          indent = len(line)-len(line.lstrip()) 
 878   
 879          if indent == len(line): 
 880              # Ignore blank lines. 
 881              linenum += 1 
 882              continue 
 883          elif line[indent:indent+4] == '>>> ': 
 884              # blocks starting with ">>> " are doctest block tokens. 
 885              linenum = _tokenize_doctest(lines, linenum, indent, 
 886                                          tokens, errors) 
 887          elif _BULLET_RE.match(line, indent): 
 888              # blocks starting with a bullet are LI start tokens. 
 889              linenum = _tokenize_listart(lines, linenum, indent, 
 890                                          tokens, errors) 
 891              if tokens[-1].indent != None: 
 892                  indent = tokens[-1].indent 
 893          else: 
 894              # Check for mal-formatted field items. 
 895              if line[indent] == '@': 
 896                  estr = "Possible mal-formatted field item." 
 897                  errors.append(TokenizationError(estr, linenum, is_fatal=0)) 
 898               
 899              # anything else is either a paragraph or a heading. 
 900              linenum = _tokenize_para(lines, linenum, indent, tokens, errors) 
 901   
 902          # Paragraph tokens ending in '::' initiate literal blocks. 
 903          if (tokens[-1].tag == Token.PARA and 
 904              tokens[-1].contents[-2:] == '::'): 
 905              tokens[-1].contents = tokens[-1].contents[:-1] 
 906              linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) 
 907   
 908      return tokens 
 909   
 910   
 911  ################################################## 
 912  ## Inline markup ("colorizing") 
 913  ################################################## 
 914   
 915  # Assorted regular expressions used for colorizing. 
 916  _BRACE_RE = re.compile('{|}') 
 917  _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') 
 918   
 919 -def _colorize(doc, token, errors, tagName='para'): 
 920      """ 
 921      Given a string containing the contents of a paragraph, produce a 
 922      DOM C{Element} encoding that paragraph.  Colorized regions are 
 923      represented using DOM C{Element}s, and text is represented using 
 924      DOM C{Text}s. 
 925   
 926      @param errors: A list of errors.  Any newly generated errors will 
 927          be appended to this list. 
 928      @type errors: C{list} of C{string} 
 929       
 930      @param tagName: The element tag for the DOM C{Element} that should 
 931          be generated. 
 932      @type tagName: C{string} 
 933       
 934      @return: a DOM C{Element} encoding the given paragraph. 
 935      @returntype: C{Element} 
 936      """ 
 937      str = token.contents 
 938      linenum = 0 
 939       
 940      # Maintain a stack of DOM elements, containing the ancestors of 
 941      # the text currently being analyzed.  New elements are pushed when  
 942      # "{" is encountered, and old elements are popped when "}" is 
 943      # encountered.  
 944      stack = [doc.createElement(tagName)] 
 945   
 946      # This is just used to make error-reporting friendlier.  It's a 
 947      # stack parallel to "stack" containing the index of each element's  
 948      # open brace. 
 949      openbrace_stack = [0] 
 950   
 951      # Process the string, scanning for '{' and '}'s.  start is the 
 952      # index of the first unprocessed character.  Each time through the 
 953      # loop, we process the text from the first unprocessed character 
 954      # to the next open or close brace. 
 955      start = 0 
 956      while 1: 
 957          match = _BRACE_RE.search(str, start) 
 958          if match == None: break 
 959          end = match.start() 
 960           
 961          # Open braces start new colorizing elements.  When preceeded 
 962          # by a capital letter, they specify a colored region, as 
 963          # defined by the _COLORIZING_TAGS dictionary.  Otherwise,  
 964          # use a special "literal braces" element (with tag "litbrace"), 
 965          # and convert them to literal braces once we find the matching  
 966          # close-brace. 
 967          if match.group() == '{': 
 968              if (end>0) and 'A' <= str[end-1] <= 'Z': 
 969                  if (end-1) > start: 
 970                      stack[-1].appendChild(doc.createTextNode(str[start:end-1])) 
 971                  if not _COLORIZING_TAGS.has_key(str[end-1]): 
 972                      estr = "Unknown inline markup tag." 
 973                      errors.append(ColorizingError(estr, token, end-1)) 
 974                      stack.append(doc.createElement('unknown')) 
 975                  else: 
 976                      tag = _COLORIZING_TAGS[str[end-1]] 
 977                      stack.append(doc.createElement(tag)) 
 978              else: 
 979                  if end > start: 
 980                      stack[-1].appendChild(doc.createTextNode(str[start:end])) 
 981                  stack.append(doc.createElement('litbrace')) 
 982              openbrace_stack.append(end) 
 983              stack[-2].appendChild(stack[-1]) 
 984               
 985          # Close braces end colorizing elements. 
 986          elif match.group() == '}': 
 987              # Check for (and ignore) unbalanced braces. 
 988              if len(stack) <= 1: 
 989                  estr = "Unbalanced '}'." 
 990                  errors.append(ColorizingError(estr, token, end)) 
 991                  start = end + 1 
 992                  continue 
 993   
 994              # Add any remaining text. 
 995              if end > start: 
 996                  stack[-1].appendChild(doc.createTextNode(str[start:end])) 
 997   
 998              # Special handling for symbols: 
 999              if stack[-1].tagName == 'symbol': 
1000                  if (len(stack[-1].childNodes) != 1 or 
1001                      not isinstance(stack[-1].childNodes[0], Text)): 
1002                      estr = "Invalid symbol code." 
1003                      errors.append(ColorizingError(estr, token, end)) 
1004                  else: 
1005                      symb = stack[-1].childNodes[0].data 
1006                      if _SYMBOLS.has_key(symb): 
1007                          # It's a symbol 
1008                          symbol = doc.createElement('symbol') 
1009                          stack[-2].removeChild(stack[-1]) 
1010                          stack[-2].appendChild(symbol) 
1011                          symbol.appendChild(doc.createTextNode(symb)) 
1012                      else: 
1013                          estr = "Invalid symbol code." 
1014                          errors.append(ColorizingError(estr, token, end)) 
1015                           
1016              # Special handling for escape elements: 
1017              if stack[-1].tagName == 'escape': 
1018                  if (len(stack[-1].childNodes) != 1 or 
1019                      not isinstance(stack[-1].childNodes[0], Text)): 
1020                      estr = "Invalid escape code." 
1021                      errors.append(ColorizingError(estr, token, end)) 
1022                  else: 
1023                      escp = stack[-1].childNodes[0].data 
1024                      if _ESCAPES.has_key(escp): 
1025                          # It's an escape from _ESCPAES 
1026                          stack[-2].removeChild(stack[-1]) 
1027                          escp = _ESCAPES[escp] 
1028                          stack[-2].appendChild(doc.createTextNode(escp)) 
1029                      elif len(escp) == 1: 
1030                          # It's a single-character escape (eg E{.}) 
1031                          stack[-2].removeChild(stack[-1]) 
1032                          stack[-2].appendChild(doc.createTextNode(escp)) 
1033                      else: 
1034                          estr = "Invalid escape code." 
1035                          errors.append(ColorizingError(estr, token, end)) 
1036   
1037              # Special handling for literal braces elements: 
1038              if stack[-1].tagName == 'litbrace': 
1039                  variables = stack[-1].childNodes 
1040                  stack[-2].removeChild(stack[-1]) 
1041                  stack[-2].appendChild(doc.createTextNode('{')) 
1042                  for child in variables: 
1043                      stack[-2].appendChild(child) 
1044                  stack[-2].appendChild(doc.createTextNode('}')) 
1045   
1046              # Special handling for graphs: 
1047              if stack[-1].tagName == 'graph': 
1048                  _colorize_graph(doc, stack[-1], token, end, errors) 
1049   
1050              # Special handling for link-type elements: 
1051              if stack[-1].tagName in _LINK_COLORIZING_TAGS: 
1052                  _colorize_link(doc, stack[-1], token, end, errors) 
1053   
1054              # Pop the completed element. 
1055              openbrace_stack.pop() 
1056              stack.pop() 
1057   
1058          start = end+1 
1059   
1060      # Add any final text. 
1061      if start < len(str): 
1062          stack[-1].appendChild(doc.createTextNode(str[start:])) 
1063           
1064      if len(stack) != 1:  
1065          estr = "Unbalanced '{'." 
1066          errors.append(ColorizingError(estr, token, openbrace_stack[-1])) 
1067   
1068      return stack[0] 
1069   
1070  GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] 
1071   
1072 -def _colorize_graph(doc, graph, token, end, errors): 
1073      """ 
1074      Eg:: 
1075        G{classtree} 
1076        G{classtree x, y, z} 
1077        G{importgraph} 
1078      """ 
1079      bad_graph_spec = False 
1080       
1081      children = graph.childNodes[:] 
1082      for child in children: graph.removeChild(child) 
1083   
1084      if len(children) != 1 or not isinstance(children[0], Text): 
1085          bad_graph_spec = "Bad graph specification" 
1086      else: 
1087          pieces = children[0].data.split(None, 1) 
1088          graphtype = pieces[0].replace(':','').strip().lower() 
1089          if graphtype in GRAPH_TYPES: 
1090              if len(pieces) == 2: 
1091                  if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): 
1092                      args = pieces[1].replace(',', ' ').replace(':','').split() 
1093                  else: 
1094                      bad_graph_spec = "Bad graph arg list" 
1095              else: 
1096                  args = [] 
1097          else: 
1098              bad_graph_spec = ("Bad graph type %s -- use one of %s" % 
1099                                (pieces[0], ', '.join(GRAPH_TYPES))) 
1100   
1101      if bad_graph_spec: 
1102          errors.append(ColorizingError(bad_graph_spec, token, end)) 
1103          graph.appendChild(doc.createTextNode('none')) 
1104          graph.appendChild(doc.createTextNode('')) 
1105          return 
1106   
1107      graph.appendChild(doc.createTextNode(graphtype)) 
1108      for arg in args: 
1109          graph.appendChild(doc.createTextNode(arg)) 
1110   
1111 -def _colorize_link(doc, link, token, end, errors): 
1112      variables = link.childNodes[:] 
1113   
1114      # If the last child isn't text, we know it's bad. 
1115      if len(variables)==0 or not isinstance(variables[-1], Text): 
1116          estr = "Bad %s target." % link.tagName 
1117          errors.append(ColorizingError(estr, token, end)) 
1118          return 
1119       
1120      # Did they provide an explicit target? 
1121      match2 = _TARGET_RE.match(variables[-1].data) 
1122      if match2: 
1123          (text, target) = match2.groups() 
1124          variables[-1].data = text 
1125      # Can we extract an implicit target? 
1126      elif len(variables) == 1: 
1127          target = variables[0].data 
1128      else: 
1129          estr = "Bad %s target." % link.tagName 
1130          errors.append(ColorizingError(estr, token, end)) 
1131          return 
1132   
1133      # Construct the name element. 
1134      name_elt = doc.createElement('name') 
1135      for child in variables: 
1136          name_elt.appendChild(link.removeChild(child)) 
1137   
1138      # Clean up the target.  For URIs, assume http or mailto if they 
1139      # don't specify (no relative urls) 
1140      target = re.sub(r'\s', '', target) 
1141      if link.tagName=='uri': 
1142          if not re.match(r'\w+:', target): 
1143              if re.match(r'\w+@(\w+)(\.\w+)*', target): 
1144                  target = 'mailto:' + target 
1145              else: 
1146                  target = 'http://'+target 
1147      elif link.tagName=='link': 
1148          # Remove arg lists for functions (e.g., L{_colorize_link()}) 
1149          target = re.sub(r'\(.*\)$', '', target) 
1150          if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target): 
1151              estr = "Bad link target." 
1152              errors.append(ColorizingError(estr, token, end)) 
1153              return 
1154   
1155      # Construct the target element. 
1156      target_elt = doc.createElement('target') 
1157      target_elt.appendChild(doc.createTextNode(target)) 
1158   
1159      # Add them to the link element. 
1160      link.appendChild(name_elt) 
1161      link.appendChild(target_elt) 
1162   
1163  ################################################## 
1164  ## Formatters 
1165  ################################################## 
1166   
1167 -def to_epytext(tree, indent=0, seclevel=0): 
1168      """ 
1169      Convert a DOM document encoding epytext back to an epytext string. 
1170      This is the inverse operation from L{parse}.  I.e., assuming there 
1171      are no errors, the following is true: 
1172          - C{parse(to_epytext(tree)) == tree} 
1173   
1174      The inverse is true, except that whitespace, line wrapping, and 
1175      character escaping may be done differently. 
1176          - C{to_epytext(parse(str)) == str} (approximately) 
1177   
1178      @param tree: A DOM document encoding of an epytext string. 
1179      @type tree: L{xml.dom.minidom.Document} 
1180      @param indent: The indentation for the string representation of 
1181          C{tree}.  Each line of the returned string will begin with 
1182          C{indent} space characters. 
1183      @type indent: C{int} 
1184      @param seclevel: The section level that C{tree} appears at.  This 
1185          is used to generate section headings. 
1186      @type seclevel: C{int} 
1187      @return: The epytext string corresponding to C{tree}. 
1188      @rtype: C{string} 
1189      """ 
1190      if isinstance(tree, Document): 
1191          return to_epytext(tree.childNodes[0], indent, seclevel) 
1192      if isinstance(tree, Text): 
1193          str = re.sub(r'\{', '\0', tree.data) 
1194          str = re.sub(r'\}', '\1', str) 
1195          return str 
1196   
1197      if tree.tagName == 'epytext': indent -= 2 
1198      if tree.tagName == 'section': seclevel += 1 
1199      variables = [to_epytext(c, indent+2, seclevel) for c in tree.childNodes] 
1200      childstr = ''.join(variables) 
1201   
1202      # Clean up for literal blocks (add the double "::" back) 
1203      childstr = re.sub(':(\s*)\2', '::\\1', childstr) 
1204   
1205      if tree.tagName == 'para': 
1206          str = wordwrap(childstr, indent)+'\n' 
1207          str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 
1208          str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 
1209          str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 
1210          str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 
1211          str = re.sub('\0', 'E{lb}', str) 
1212          str = re.sub('\1', 'E{rb}', str) 
1213          return str 
1214      elif tree.tagName == 'li': 
1215          bulletAttr = tree.getAttributeNode('bullet') 
1216          if bulletAttr: bullet = bulletAttr.value 
1217          else: bullet = '-' 
1218          return indent*' '+ bullet + ' ' + childstr.lstrip() 
1219      elif tree.tagName == 'heading': 
1220          str = re.sub('\0', 'E{lb}',childstr) 
1221          str = re.sub('\1', 'E{rb}', str) 
1222          uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1223          return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' 
1224      elif tree.tagName == 'doctestblock': 
1225          str = re.sub('\0', '{', childstr) 
1226          str = re.sub('\1', '}', str) 
1227          lines = ['  '+indent*' '+line for line in str.split('\n')] 
1228          return '\n'.join(lines) + '\n\n' 
1229      elif tree.tagName == 'literalblock': 
1230          str = re.sub('\0', '{', childstr) 
1231          str = re.sub('\1', '}', str) 
1232          lines = [(indent+1)*' '+line for line in str.split('\n')] 
1233          return '\2' + '\n'.join(lines) + '\n\n' 
1234      elif tree.tagName == 'field': 
1235          numargs = 0 
1236          while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 
1237          tag = variables[0] 
1238          args = variables[1:1+numargs] 
1239          body = variables[1+numargs:] 
1240          str = (indent)*' '+'@'+variables[0] 
1241          if args: str += '(' + ', '.join(args) + ')' 
1242          return str + ':\n' + ''.join(body) 
1243      elif tree.tagName == 'target': 
1244          return '<%s>' % childstr 
1245      elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext', 
1246                            'section', 'olist', 'ulist', 'name'): 
1247          return childstr 
1248      elif tree.tagName == 'symbol': 
1249          return 'E{%s}' % childstr 
1250      elif tree.tagName == 'graph': 
1251          return 'G{%s}' % ' '.join(variables) 
1252      else: 
1253          for (tag, name) in _COLORIZING_TAGS.items(): 
1254              if name == tree.tagName: 
1255                  return '%s{%s}' % (tag, childstr) 
1256      raise ValueError('Unknown DOM element %r' % tree.tagName) 
1257   
1258 -def to_plaintext(tree, indent=0, seclevel=0): 
1259      """     
1260      Convert a DOM document encoding epytext to a string representation. 
1261      This representation is similar to the string generated by 
1262      C{to_epytext}, but C{to_plaintext} removes inline markup, prints 
1263      escaped characters in unescaped form, etc. 
1264   
1265      @param tree: A DOM document encoding of an epytext string. 
1266      @type tree: L{xml.dom.minidom.Document} 
1267      @param indent: The indentation for the string representation of 
1268          C{tree}.  Each line of the returned string will begin with 
1269          C{indent} space characters. 
1270      @type indent: C{int} 
1271      @param seclevel: The section level that C{tree} appears at.  This 
1272          is used to generate section headings. 
1273      @type seclevel: C{int} 
1274      @return: The epytext string corresponding to C{tree}. 
1275      @rtype: C{string} 
1276      """ 
1277      if isinstance(tree, Document): 
1278          return to_plaintext(tree.childNodes[0], indent, seclevel) 
1279      if isinstance(tree, Text): return tree.data 
1280   
1281      if tree.tagName == 'section': seclevel += 1 
1282   
1283      # Figure out the child indent level. 
1284      if tree.tagName == 'epytext': cindent = indent 
1285      elif tree.tagName == 'li' and tree.getAttributeNode('bullet'): 
1286          cindent = indent + 1 + len(tree.getAttributeNode('bullet').value) 
1287      else: 
1288          cindent = indent + 2 
1289      variables = [to_plaintext(c, cindent, seclevel) for c in tree.childNodes] 
1290      childstr = ''.join(variables) 
1291   
1292      if tree.tagName == 'para': 
1293          return wordwrap(childstr, indent)+'\n' 
1294      elif tree.tagName == 'li': 
1295          # We should be able to use getAttribute here; but there's no 
1296          # convenient way to test if an element has an attribute.. 
1297          bulletAttr = tree.getAttributeNode('bullet') 
1298          if bulletAttr: bullet = bulletAttr.value 
1299          else: bullet = '-' 
1300          return indent*' ' + bullet + ' ' + childstr.lstrip() 
1301      elif tree.tagName == 'heading': 
1302          uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1303          return ((indent-2)*' ' + childstr + '\n' + 
1304                  (indent-2)*' ' + uline + '\n') 
1305      elif tree.tagName == 'doctestblock': 
1306          lines = [(indent+2)*' '+line for line in childstr.split('\n')] 
1307          return '\n'.join(lines) + '\n\n' 
1308      elif tree.tagName == 'literalblock': 
1309          lines = [(indent+1)*' '+line for line in childstr.split('\n')] 
1310          return '\n'.join(lines) + '\n\n' 
1311      elif tree.tagName == 'fieldlist': 
1312          return childstr 
1313      elif tree.tagName == 'field': 
1314          numargs = 0 
1315          while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 
1316          tag = variables[0] 
1317          args = variables[1:1+numargs] 
1318          body = variables[1+numargs:] 
1319          str = (indent)*' '+'@'+variables[0] 
1320          if args: str += '(' + ', '.join(args) + ')' 
1321          return str + ':\n' + ''.join(body) 
1322      elif tree.tagName == 'uri': 
1323          if len(variables) != 2: raise ValueError('Bad URI ') 
1324          elif variables[0] == variables[1]: return '<%s>' % variables[1] 
1325          else: return '%r<%s>' % (variables[0], variables[1]) 
1326      elif tree.tagName == 'link': 
1327          if len(variables) != 2: raise ValueError('Bad Link') 
1328          return '%s' % variables[0] 
1329      elif tree.tagName in ('olist', 'ulist'): 
1330          # [xx] always use condensed lists. 
1331          ## Use a condensed list if each list item is 1 line long. 
1332          #for child in variables: 
1333          #    if child.count('\n') > 2: return childstr 
1334          return childstr.replace('\n\n', '\n')+'\n' 
1335      elif tree.tagName == 'symbol': 
1336          return '%s' % childstr 
1337      elif tree.tagName == 'graph': 
1338          return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) 
1339      else: 
1340          # Assume that anything else can be passed through. 
1341          return childstr 
1342   
1343 -def to_debug(tree, indent=4, seclevel=0): 
1344      """     
1345      Convert a DOM document encoding epytext back to an epytext string, 
1346      annotated with extra debugging information.  This function is 
1347      similar to L{to_epytext}, but it adds explicit information about 
1348      where different blocks begin, along the left margin. 
1349   
1350      @param tree: A DOM document encoding of an epytext string. 
1351      @type tree: L{xml.dom.minidom.Document} 
1352      @param indent: The indentation for the string representation of 
1353          C{tree}.  Each line of the returned string will begin with 
1354          C{indent} space characters. 
1355      @type indent: C{int} 
1356      @param seclevel: The section level that C{tree} appears at.  This 
1357          is used to generate section headings. 
1358      @type seclevel: C{int} 
1359      @return: The epytext string corresponding to C{tree}. 
1360      @rtype: C{string} 
1361      """ 
1362      if isinstance(tree, Document): 
1363          return to_debug(tree.childNodes[0], indent, seclevel) 
1364      if isinstance(tree, Text): 
1365          str = re.sub(r'\{', '\0', tree.data) 
1366          str = re.sub(r'\}', '\1', str) 
1367          return str 
1368   
1369      if tree.tagName == 'section': seclevel += 1 
1370      variables = [to_debug(c, indent+2, seclevel) for c in tree.childNodes] 
1371      childstr = ''.join(variables) 
1372   
1373      # Clean up for literal blocks (add the double "::" back) 
1374      childstr = re.sub(':( *\n     \|\n)\2', '::\\1', childstr) 
1375   
1376      if tree.tagName == 'para': 
1377          str = wordwrap(childstr, indent-6, 69)+'\n' 
1378          str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 
1379          str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 
1380          str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 
1381          str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 
1382          str = re.sub('\0', 'E{lb}', str) 
1383          str = re.sub('\1', 'E{rb}', str) 
1384          lines = str.rstrip().split('\n') 
1385          lines[0] = '   P>|' + lines[0] 
1386          lines[1:] = ['     |'+l for l in lines[1:]] 
1387          return '\n'.join(lines)+'\n     |\n' 
1388      elif tree.tagName == 'li': 
1389          bulletAttr = tree.getAttributeNode('bullet') 
1390          if bulletAttr: bullet = bulletAttr.value 
1391          else: bullet = '-' 
1392          return '  LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() 
1393      elif tree.tagName in ('olist', 'ulist'): 
1394          return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] 
1395      elif tree.tagName == 'heading': 
1396          str = re.sub('\0', 'E{lb}', childstr) 
1397          str = re.sub('\1', 'E{rb}', str) 
1398          uline = len(childstr)*_HEADING_CHARS[seclevel-1] 
1399          return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + 
1400                  '     |'+(indent-8)*' ' + uline + '\n') 
1401      elif tree.tagName == 'doctestblock': 
1402          str = re.sub('\0', '{', childstr) 
1403          str = re.sub('\1', '}', str) 
1404          lines = ['     |'+(indent-4)*' '+line for line in str.split('\n')] 
1405          lines[0] = 'DTST>'+lines[0][5:] 
1406          return '\n'.join(lines) + '\n     |\n' 
1407      elif tree.tagName == 'literalblock': 
1408          str = re.sub('\0', '{', childstr) 
1409          str = re.sub('\1', '}', str) 
1410          lines = ['     |'+(indent-5)*' '+line for line in str.split('\n')] 
1411          lines[0] = ' LIT>'+lines[0][5:] 
1412          return '\2' + '\n'.join(lines) + '\n     |\n' 
1413      elif tree.tagName == 'field': 
1414          numargs = 0 
1415          while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 
1416          tag = variables[0] 
1417          args = variables[1:1+numargs] 
1418          body = variables[1+numargs:] 
1419          str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] 
1420          if args: str += '(' + ', '.join(args) + ')' 
1421          return str + ':\n' + ''.join(body) 
1422      elif tree.tagName == 'target': 
1423          return '<%s>' % childstr 
1424      elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext', 
1425                            'section', 'olist', 'ulist', 'name'): 
1426          return childstr 
1427      elif tree.tagName == 'symbol': 
1428          return 'E{%s}' % childstr 
1429      elif tree.tagName == 'graph': 
1430          return 'G{%s}' % ' '.join(variables) 
1431      else: 
1432          for (tag, name) in _COLORIZING_TAGS.items(): 
1433              if name == tree.tagName: 
1434                  return '%s{%s}' % (tag, childstr) 
1435      raise ValueError('Unknown DOM element %r' % tree.tagName) 
1436   
1437  ################################################## 
1438  ## Top-Level Wrapper function 
1439  ################################################## 
1440 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr): 
1441      """ 
1442      Pretty-parse the string.  This parses the string, and catches any 
1443      warnings or errors produced.  Any warnings and errors are 
1444      displayed, and the resulting DOM parse structure is returned. 
1445   
1446      @param str: The string to parse. 
1447      @type str: C{string} 
1448      @param show_warnings: Whether or not to display non-fatal errors 
1449          generated by parsing C{str}. 
1450      @type show_warnings: C{boolean} 
1451      @param show_errors: Whether or not to display fatal errors  
1452          generated by parsing C{str}. 
1453      @type show_errors: C{boolean} 
1454      @param stream: The stream that warnings and errors should be 
1455          written to. 
1456      @type stream: C{stream} 
1457      @return: a DOM document encoding the contents of C{str}. 
1458      @rtype: L{xml.dom.minidom.Document} 
1459      @raise SyntaxError: If any fatal errors were encountered. 
1460      """ 
1461      errors = [] 
1462      confused = 0 
1463      try: 
1464          val = parse(str, errors) 
1465          warnings = [e for e in errors if not e.is_fatal()] 
1466          errors = [e for e in errors if e.is_fatal()] 
1467      except: 
1468          confused = 1 
1469           
1470      if not show_warnings: warnings = [] 
1471      warnings.sort() 
1472      errors.sort() 
1473      if warnings: 
1474          print >>stream, '='*SCRWIDTH 
1475          print >>stream, "WARNINGS" 
1476          print >>stream, '-'*SCRWIDTH 
1477          for warning in warnings: 
1478              print >>stream, warning.as_warning() 
1479          print >>stream, '='*SCRWIDTH 
1480      if errors and show_errors: 
1481          if not warnings: print >>stream, '='*SCRWIDTH 
1482          print >>stream, "ERRORS" 
1483          print >>stream, '-'*SCRWIDTH 
1484          for error in errors: 
1485              print >>stream, error 
1486          print >>stream, '='*SCRWIDTH 
1487   
1488      if confused: raise 
1489      elif errors: raise SyntaxError('Encountered Errors') 
1490      else: return val 
1491   
1492  ################################################## 
1493  ## Parse Errors 
1494  ################################################## 
1495   
1496 -class TokenizationError(ParseError): 
1497      """ 
1498      An error generated while tokenizing a formatted documentation 
1499      string. 
1500      """ 
1501   
1502 -class StructuringError(ParseError): 
1503      """ 
1504      An error generated while structuring a formatted documentation 
1505      string. 
1506      """ 
1507   
1508 -class ColorizingError(ParseError): 
1509      """ 
1510      An error generated while colorizing a paragraph. 
1511      """ 
1512 -    def __init__(self, descr, token, charnum, is_fatal=1): 
1513          """ 
1514          Construct a new colorizing exception. 
1515           
1516          @param descr: A short description of the error. 
1517          @type descr: C{string} 
1518          @param token: The token where the error occured 
1519          @type token: L{Token} 
1520          @param charnum: The character index of the position in 
1521              C{token} where the error occured. 
1522          @type charnum: C{int} 
1523          """ 
1524          ParseError.__init__(self, descr, token.startline, is_fatal) 
1525          self.token = token 
1526          self.charnum = charnum 
1527   
1528      CONTEXT_RANGE = 20 
1529 -    def descr(self): 
1530          RANGE = self.CONTEXT_RANGE 
1531          if self.charnum <= RANGE: 
1532              left = self.token.contents[0:self.charnum] 
1533          else: 
1534              left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] 
1535          if (len(self.token.contents)-self.charnum) <= RANGE: 
1536              right = self.token.contents[self.charnum:] 
1537          else: 
1538              right = (self.token.contents[self.charnum:self.charnum+RANGE] 
1539                       + '...') 
1540          return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left))) 
1541                   
1542  ################################################## 
1543  ## Convenience parsers 
1544  ################################################## 
1545   
1546 -def parse_as_literal(str): 
1547      """ 
1548      Return a DOM document matching the epytext DTD, containing a 
1549      single literal block.  That literal block will include the 
1550      contents of the given string.  This method is typically used as a 
1551      fall-back when the parser fails. 
1552   
1553      @param str: The string which should be enclosed in a literal 
1554          block. 
1555      @type str: C{string} 
1556       
1557      @return: A DOM document containing C{str} in a single literal 
1558          block. 
1559      @rtype: L{xml.dom.minidom.Document} 
1560      """ 
1561      doc = Document() 
1562      epytext = doc.createElement('epytext') 
1563      lit = doc.createElement('literalblock') 
1564      doc.appendChild(epytext) 
1565      epytext.appendChild(lit) 
1566      lit.appendChild(doc.createTextNode(str)) 
1567      return doc 
1568   
1569 -def parse_as_para(str): 
1570      """ 
1571      Return a DOM document matching the epytext DTD, containing a 
1572      single paragraph.  That paragraph will include the contents of the 
1573      given string.  This can be used to wrap some forms of 
1574      automatically generated information (such as type names) in 
1575      paragraphs. 
1576   
1577      @param str: The string which should be enclosed in a paragraph. 
1578      @type str: C{string} 
1579       
1580      @return: A DOM document containing C{str} in a single paragraph. 
1581      @rtype: L{xml.dom.minidom.Document} 
1582      """ 
1583      doc = Document() 
1584      epytext = doc.createElement('epytext') 
1585      para = doc.createElement('para') 
1586      doc.appendChild(epytext) 
1587      epytext.appendChild(para) 
1588      para.appendChild(doc.createTextNode(str)) 
1589      return doc 
1590   
1591  ################################################################# 
1592  ##                    SUPPORT FOR EPYDOC 
1593  ################################################################# 
1594  from epydoc.docwriter.dotgraph import * 
1595   
1596 -def parse_docstring(docstring, errors, **options): 
1597      """ 
1598      Parse the given docstring, which is formatted using epytext; and 
1599      return a C{ParsedDocstring} representation of its contents. 
1600      @param docstring: The docstring to parse 
1601      @type docstring: C{string} 
1602      @param errors: A list where any errors generated during parsing 
1603          will be stored. 
1604      @type errors: C{list} of L{ParseError} 
1605      @param options: Extra options.  Unknown options are ignored. 
1606          Currently, no extra options are defined. 
1607      @rtype: L{ParsedDocstring} 
1608      """ 
1609      return ParsedEpytextDocstring(parse(docstring, errors)) 
1610       
1611 -class ParsedEpytextDocstring(ParsedDocstring): 
1612      SYMBOL_TO_HTML = { 
1613          # Symbols 
1614          '<-': 'larr', '->': 'rarr', '^': 'uarr', 'v': 'darr', 
1615       
1616          # Greek letters 
1617          'alpha': 'alpha', 'beta': 'beta', 'gamma': 'gamma', 
1618          'delta': 'delta', 'epsilon': 'epsilon', 'zeta': 'zeta',   
1619          'eta': 'eta', 'theta': 'theta', 'iota': 'iota',  
1620          'kappa': 'kappa', 'lambda': 'lambda', 'mu': 'mu',   
1621          'nu': 'nu', 'xi': 'xi', 'omicron': 'omicron',   
1622          'pi': 'pi', 'rho': 'rho', 'sigma': 'sigma',   
1623          'tau': 'tau', 'upsilon': 'upsilon', 'phi': 'phi',   
1624          'chi': 'chi', 'psi': 'psi', 'omega': 'omega', 
1625          'Alpha': 'Alpha', 'Beta': 'Beta', 'Gamma': 'Gamma', 
1626          'Delta': 'Delta', 'Epsilon': 'Epsilon', 'Zeta': 'Zeta',   
1627          'Eta': 'Eta', 'Theta': 'Theta', 'Iota': 'Iota',  
1628          'Kappa': 'Kappa', 'Lambda': 'Lambda', 'Mu': 'Mu',   
1629          'Nu': 'Nu', 'Xi': 'Xi', 'Omicron': 'Omicron',   
1630          'Pi': 'Pi', 'Rho': 'Rho', 'Sigma': 'Sigma',   
1631          'Tau': 'Tau', 'Upsilon': 'Upsilon', 'Phi': 'Phi',   
1632          'Chi': 'Chi', 'Psi': 'Psi', 'Omega': 'Omega', 
1633       
1634          # HTML character entities 
1635          'larr': 'larr', 'rarr': 'rarr', 'uarr': 'uarr', 
1636          'darr': 'darr', 'harr': 'harr', 'crarr': 'crarr', 
1637          'lArr': 'lArr', 'rArr': 'rArr', 'uArr': 'uArr', 
1638          'dArr': 'dArr', 'hArr': 'hArr',  
1639          'copy': 'copy', 'times': 'times', 'forall': 'forall', 
1640          'exist': 'exist', 'part': 'part', 
1641          'empty': 'empty', 'isin': 'isin', 'notin': 'notin', 
1642          'ni': 'ni', 'prod': 'prod', 'sum': 'sum', 
1643          'prop': 'prop', 'infin': 'infin', 'ang': 'ang', 
1644          'and': 'and', 'or': 'or', 'cap': 'cap', 'cup': 'cup', 
1645          'int': 'int', 'there4': 'there4', 'sim': 'sim', 
1646          'cong': 'cong', 'asymp': 'asymp', 'ne': 'ne', 
1647          'equiv': 'equiv', 'le': 'le', 'ge': 'ge', 
1648          'sub': 'sub', 'sup': 'sup', 'nsub': 'nsub', 
1649          'sube': 'sube', 'supe': 'supe', 'oplus': 'oplus', 
1650          'otimes': 'otimes', 'perp': 'perp', 
1651       
1652          # Alternate (long) names 
1653          'infinity': 'infin', 'integral': 'int', 'product': 'prod', 
1654          '<=': 'le', '>=': 'ge', 
1655          } 
1656       
1657      SYMBOL_TO_LATEX = { 
1658          # Symbols 
1659          '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)', 
1660          '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)', 
1661       
1662          # Greek letters (use lower case when upcase not available) 
1663   
1664          'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma': 
1665          r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon': 
1666          r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)', 
1667          'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa': 
1668          r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)', 
1669          'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi': 
1670          r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau': 
1671          r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)', 
1672          'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega': 
1673          r'\(\omega\)', 
1674           
1675          'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma': 
1676          r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon': 
1677          r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)', 
1678          'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa': 
1679          r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)', 
1680          'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi': 
1681          r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau': 
1682          r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)', 
1683          'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega': 
1684          r'\(\Omega\)', 
1685       
1686          # HTML character entities 
1687          'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr': 
1688          r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr': 
1689          r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)', 
1690          'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr': 
1691          r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr': 
1692          r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}', 
1693          'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist': 
1694          r'\(\exists\)', 'part': r'\(\partial\)', 'empty': 
1695          r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)', 
1696          'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)', 
1697          'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang': 
1698          r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap': 
1699          r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4': 
1700          r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)', 
1701          'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv': 
1702          r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub': 
1703          r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)', 
1704          'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus': 
1705          r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)', 
1706       
1707          # Alternate (long) names 
1708          'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product': 
1709          r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)', 
1710          } 
1711       
1712 -    def __init__(self, dom_tree): 
1713          if isinstance(dom_tree, Document): 
1714              dom_tree = dom_tree.childNodes[0] 
1715          self._tree = dom_tree 
1716          # Caching: 
1717          self._html = self._latex = self._plaintext = None 
1718          self._terms = None 
1719           
1720 -    def to_html(self, docstring_linker, directory=None, docindex=None, 
1721                  context=None, **options): 
1722          if self._html is not None: return self._html 
1723          if self._tree is None: return '' 
1724          indent = options.get('indent', 0) 
1725          self._html = self._to_html(self._tree, docstring_linker, directory,  
1726                                     docindex, context, indent) 
1727          return self._html 
1728   
1729 -    def to_latex(self, docstring_linker, **options): 
1730          if self._latex is not None: return self._latex 
1731          if self._tree is None: return '' 
1732          indent = options.get('indent', 0) 
1733          self._hyperref = options.get('hyperref', 1) 
1734          self._latex = self._to_latex(self._tree, docstring_linker, indent) 
1735          return self._latex 
1736   
1737 -    def to_plaintext(self, docstring_linker, **options): 
1738          # [XX] don't cache -- different options might be used!! 
1739          #if self._plaintext is not None: return self._plaintext 
1740          if self._tree is None: return '' 
1741          if 'indent' in options: 
1742              self._plaintext = to_plaintext(self._tree, 
1743                                             indent=options['indent']) 
1744          else: 
1745              self._plaintext = to_plaintext(self._tree) 
1746          return self._plaintext 
1747   
1748 -    def _index_term_key(self, tree): 
1749          str = to_plaintext(tree) 
1750          str = re.sub(r'\s\s+', '-', str) 
1751          return "index-"+re.sub("[^a-zA-Z0-9]", "_", str) 
1752   
1753 -    def _to_html(self, tree, linker, directory, docindex, context, 
1754                   indent=0, seclevel=0): 
1755          if isinstance(tree, Text): 
1756              return plaintext_to_html(tree.data) 
1757   
1758          if tree.tagName == 'epytext': indent -= 2 
1759          if tree.tagName == 'section': seclevel += 1 
1760   
1761          # Process the variables first. 
1762          variables = [self._to_html(c, linker, directory, docindex, context, 
1763                                     indent+2, seclevel) 
1764                      for c in tree.childNodes] 
1765       
1766          # Get rid of unnecessary <P>...</P> tags; they introduce extra 
1767          # space on most browsers that we don't want. 
1768          for i in range(len(variables)-1): 
1769              if (not isinstance(tree.childNodes[i], Text) and 
1770                  tree.childNodes[i].tagName == 'para' and 
1771                  (isinstance(tree.childNodes[i+1], Text) or 
1772                   tree.childNodes[i+1].tagName != 'para')): 
1773                  variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n' 
1774          if (tree.hasChildNodes() and 
1775              not isinstance(tree.childNodes[-1], Text) and 
1776              tree.childNodes[-1].tagName == 'para'): 
1777              variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n' 
1778       
1779          # Construct the HTML string for the variables. 
1780          childstr = ''.join(variables) 
1781       
1782          # Perform the approriate action for the DOM tree type. 
1783          if tree.tagName == 'para': 
1784              return wordwrap('<p>%s</p>' % childstr, indent) 
1785          elif tree.tagName == 'code': 
1786              return '<code>%s</code>' % childstr 
1787          elif tree.tagName == 'uri': 
1788              return ('<a href="%s" target="_top">%s</a>' % 
1789                      (variables[1], variables[0])) 
1790          elif tree.tagName == 'link': 
1791              return linker.translate_identifier_xref(variables[1], variables[0]) 
1792          elif tree.tagName == 'italic': 
1793              return '<i>%s</i>' % childstr 
1794          elif tree.tagName == 'math': 
1795              return '<i class="math">%s</i>' % childstr 
1796          elif tree.tagName == 'indexed': 
1797              term = tree.cloneNode(1) 
1798              term.tagName = 'epytext' 
1799              return linker.translate_indexterm(ParsedEpytextDocstring(term)) 
1800              #term_key = self._index_term_key(tree) 
1801              #return linker.translate_indexterm(childstr, term_key) 
1802          elif tree.tagName == 'bold': 
1803              return '<b>%s</b>' % childstr 
1804          elif tree.tagName == 'ulist': 
1805              return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') 
1806          elif tree.tagName == 'olist': 
1807              startAttr = tree.getAttributeNode('start') 
1808              if startAttr: start = ' start="%s"' % startAttr.value 
1809              else: start = '' 
1810              return ('%s<ol%s>\n%s%s</ol>\n' % 
1811                      (indent*' ', start, childstr, indent*' ')) 
1812          elif tree.tagName == 'li': 
1813              return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') 
1814          elif tree.tagName == 'heading': 
1815              return ('%s<h%s class="heading">%s</h%s>\n' % 
1816                      ((indent-2)*' ', seclevel, childstr, seclevel)) 
1817          elif tree.tagName == 'literalblock': 
1818              return '<pre class="literalblock">\n%s\n</pre>\n' % childstr 
1819          elif tree.tagName == 'doctestblock': 
1820              dtb = colorize_doctestblock(childstr.strip()) 
1821              return '<pre class="doctestblock">\n%s</pre>\n' % dtb 
1822          elif tree.tagName == 'fieldlist': 
1823              raise AssertionError("There should not be any field lists left") 
1824          elif tree.tagName in ('epytext', 'section', 'tag', 'arg', 
1825                                'name', 'target', 'html'): 
1826              return childstr 
1827          elif tree.tagName == 'symbol': 
1828              symbol = tree.childNodes[0].data 
1829              if self.SYMBOL_TO_HTML.has_key(symbol): 
1830                  return '&%s;' % self.SYMBOL_TO_HTML[symbol] 
1831              else: 
1832                  return '[??]' 
1833          elif tree.tagName == 'graph': 
1834              # Generate the graph. 
1835              graph = self._build_graph(variables[0], variables[1:], linker, 
1836                                        docindex, context) 
1837              if not graph: return '' 
1838              # Write the graph. 
1839              image_url = '%s.gif' % graph.uid 
1840              image_file = os.path.join(directory, image_url) 
1841              return graph.to_html(image_file, image_url) 
1842          else: 
1843              raise ValueError('Unknown epytext DOM element %r' % tree.tagName) 
1844   
1845      #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph'] 
1846 -    def _build_graph(self, graph_type, graph_args, linker,  
1847                       docindex, context): 
1848          # Generate the graph 
1849          if graph_type == 'classtree': 
1850              if graph_args: 
1851                  bases = [docindex.find(name, context) 
1852                           for name in graph_args] 
1853              elif isinstance(context, ClassDoc): 
1854                  bases = [context] 
1855              else: 
1856                  log.warning("Could not construct class tree: you must " 
1857                              "specify one or more base classes.") 
1858                  return None 
1859              return class_tree_graph(bases, linker, context) 
1860          elif graph_type == 'packagetree': 
1861              if graph_args: 
1862                  packages = [docindex.find(name, context) 
1863                              for name in graph_args] 
1864              elif isinstance(context, ModuleDoc): 
1865                  packages = [context] 
1866              else: 
1867                  log.warning("Could not construct package tree: you must " 
1868                              "specify one or more root packages.") 
1869                  return None 
1870              return package_tree_graph(packages, linker, context) 
1871          elif graph_type == 'importgraph': 
1872              modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] 
1873              return import_graph(modules, docindex, linker, context) 
1874   
1875          elif graph_type == 'callgraph': 
1876              if graph_args: 
1877                  docs = [docindex.find(name, context) for name in graph_args] 
1878                  docs = [doc for doc in docs if doc is not None] 
1879              else: 
1880                  docs = [context] 
1881              return call_graph(docs, docindex, linker, context) 
1882          else: 
1883              log.warning("Unknown graph type %s" % graph_type) 
1884               
1885       
1886 -    def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0): 
1887          if isinstance(tree, Text): 
1888              return plaintext_to_latex(tree.data, breakany=breakany) 
1889   
1890          if tree.tagName == 'section': seclevel += 1 
1891       
1892          # Figure out the child indent level. 
1893          if tree.tagName == 'epytext': cindent = indent 
1894          else: cindent = indent + 2 
1895          variables = [self._to_latex(c, linker, cindent, seclevel, breakany) 
1896                      for c in tree.childNodes] 
1897          childstr = ''.join(variables) 
1898       
1899          if tree.tagName == 'para': 
1900              return wordwrap(childstr, indent)+'\n' 
1901          elif tree.tagName == 'code': 
1902              return '\\texttt{%s}' % childstr 
1903          elif tree.tagName == 'uri': 
1904              if len(variables) != 2: raise ValueError('Bad URI ') 
1905              if self._hyperref: 
1906                  # ~ and # should not be escaped in the URI. 
1907                  uri = tree.childNodes[1].childNodes[0].data 
1908                  uri = uri.replace('{\\textasciitilde}', '~') 
1909                  uri = uri.replace('\\#', '#') 
1910                  if variables[0] == variables[1]: 
1911                      return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) 
1912                  else: 
1913                      return ('%s\\footnote{\\href{%s}{%s}}' % 
1914                              (variables[0], uri, variables[1])) 
1915              else: 
1916                  if variables[0] == variables[1]: 
1917                      return '\\textit{%s}' % variables[1] 
1918                  else: 
1919                      return '%s\\footnote{%s}' % (variables[0], variables[1]) 
1920          elif tree.tagName == 'link': 
1921              if len(variables) != 2: raise ValueError('Bad Link') 
1922              return linker.translate_identifier_xref(variables[1], variables[0]) 
1923          elif tree.tagName == 'italic': 
1924              return '\\textit{%s}' % childstr 
1925          elif tree.tagName == 'math': 
1926              return '\\textit{%s}' % childstr 
1927          elif tree.tagName == 'indexed': 
1928              term = tree.cloneNode(1) 
1929              term.tagName = 'epytext' 
1930              return linker.translate_indexterm(ParsedEpytextDocstring(term)) 
1931          elif tree.tagName == 'bold': 
1932              return '\\textbf{%s}' % childstr 
1933          elif tree.tagName == 'li': 
1934              return indent*' ' + '\\item ' + childstr.lstrip() 
1935          elif tree.tagName == 'heading': 
1936              return ' '*(indent-2) + '(section) %s\n\n' % childstr 
1937          elif tree.tagName == 'doctestblock': 
1938              return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 
1939          elif tree.tagName == 'literalblock': 
1940              return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 
1941          elif tree.tagName == 'fieldlist': 
1942              return indent*' '+'{omitted fieldlist}\n' 
1943          elif tree.tagName == 'olist': 
1944              return (' '*indent + '\\begin{enumerate}\n\n' +  
1945                      ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + 
1946                      childstr + 
1947                      ' '*indent + '\\end{enumerate}\n\n') 
1948          elif tree.tagName == 'ulist': 
1949              return (' '*indent + '\\begin{itemize}\n' + 
1950                      ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + 
1951                      childstr + 
1952                      ' '*indent + '\\end{itemize}\n\n') 
1953          elif tree.tagName == 'symbol': 
1954              symbol = tree.childNodes[0].data 
1955              if self.SYMBOL_TO_LATEX.has_key(symbol): 
1956                  return r'%s' % self.SYMBOL_TO_LATEX[symbol] 
1957              else: 
1958                  return '[??]' 
1959          elif tree.tagName == 'graph': 
1960              return '(GRAPH)' 
1961              #raise ValueError, 'graph not implemented yet for latex' 
1962          else: 
1963              # Assume that anything else can be passed through. 
1964              return childstr 
1965   
1966 -    def summary(self): 
1967          if self._tree is None: return self 
1968   
1969          # Is the cloning that happens here safe/proper?  (Cloning 
1970          # between 2 different documents) 
1971          tree = self._tree 
1972           
1973          doc = Document() 
1974          epytext = doc.createElement('epytext') 
1975          doc.appendChild(epytext) 
1976       
1977          # Find the first paragraph. 
1978          variables = tree.childNodes 
1979          while (len(variables) > 0) and (variables[0].tagName != 'para'): 
1980              if variables[0].tagName in ('section', 'ulist', 'olist', 'li'): 
1981                  variables = variables[0].childNodes 
1982              else: 
1983                  variables = variables[1:] 
1984       
1985          # Special case: if the docstring contains a single literal block, 
1986          # then try extracting the summary from it. 
1987          if (len(variables) == 0 and len(tree.childNodes) == 1 and 
1988              tree.childNodes[0].tagName == 'literalblock'): 
1989              str = re.split(r'\n\s*(\n|$).*', 
1990                             tree.childNodes[0].childNodes[0].data, 1)[0] 
1991              variables = [doc.createElement('para')] 
1992              variables[0].appendChild(doc.createTextNode(str)) 
1993       
1994          # If we didn't find a paragraph, return an empty epytext. 
1995          if len(variables) == 0: return ParsedEpytextDocstring(doc) 
1996       
1997          # Extract the first sentence. 
1998          parachildren = variables[0].childNodes 
1999          para = doc.createElement('para') 
2000          epytext.appendChild(para) 
2001          for parachild in parachildren: 
2002              if isinstance(parachild, Text): 
2003                  m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild.data) 
2004                  if m: 
2005                      para.appendChild(doc.createTextNode(m.group(1))) 
2006                      return ParsedEpytextDocstring(doc) 
2007              para.appendChild(parachild.cloneNode(1)) 
2008   
2009          return ParsedEpytextDocstring(doc) 
2010   
2011 -    def split_fields(self, errors=None): 
2012          if self._tree is None: return (self, ()) 
2013          tree = self._tree.cloneNode(1) # Hmm.. 
2014          fields = [] 
2015   
2016          if (tree.hasChildNodes() and 
2017              tree.childNodes[-1].tagName == 'fieldlist' and 
2018              tree.childNodes[-1].hasChildNodes()): 
2019              field_nodes = tree.childNodes[-1].childNodes 
2020              tree.removeChild(tree.childNodes[-1]) 
2021   
2022              for field in field_nodes: 
2023                  # Get the tag 
2024                  tag = field.childNodes[0].childNodes[0].data.lower() 
2025                  field.removeChild(field.childNodes[0]) 
2026   
2027                  # Get the argument. 
2028                  if field.childNodes and field.childNodes[0].tagName == 'arg': 
2029                      arg = field.childNodes[0].childNodes[0].data 
2030                      field.removeChild(field.childNodes[0]) 
2031                  else: 
2032                      arg = None 
2033   
2034                  # Process the field. 
2035                  field.tagName = 'epytext' 
2036                  fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) 
2037   
2038          # Save the remaining docstring as the description.. 
2039          if tree.hasChildNodes() and tree.childNodes[0].hasChildNodes(): 
2040              descr = tree 
2041          else: 
2042              descr = None 
2043   
2044          return ParsedEpytextDocstring(descr), fields 
2045       
2046 -    def index_terms(self): 
2047          if self._terms is None: 
2048              self._terms = [] 
2049              self._index_terms(self._tree, self._terms) 
2050          return self._terms 
2051   
2052 -    def _index_terms(self, tree, terms): 
2053          if tree is None or isinstance(tree, Text): 
2054              return 
2055           
2056          if tree.tagName == 'indexed': 
2057              term = tree.cloneNode(1) 
2058              term.tagName = 'epytext' 
2059              terms.append(ParsedEpytextDocstring(term)) 
2060   
2061          # Look for index items in child nodes. 
2062          for child in tree.childNodes: 
2063              self._index_terms(child, terms) 
2064
Source Code for Module epydoc.markup.epytext