Package epydoc :: Package markup :: Module epytext
[hide private]
[frames] | no frames]

Source Code for Module epydoc.markup.epytext

   1  # 
   2  # epytext.py: epydoc formatted docstring parsing 
   3  # Edward Loper 
   4  # 
   5  # Created [04/10/01 12:00 AM] 
   6  # $Id: epytext.py 1210 2006-04-10 13:25:50Z edloper $ 
   7  # 
   8   
   9  """ 
  10  Parser for epytext strings.  Epytext is a lightweight markup whose 
  11  primary intended application is Python documentation strings.  This 
  12  parser converts Epytext strings to a XML/DOM representation.  Epytext 
  13  strings can contain the following X{structural blocks}: 
  14   
  15      - X{epytext}: The top-level element of the DOM tree. 
  16      - X{para}: A paragraph of text.  Paragraphs contain no newlines,  
  17        and all spaces are soft. 
  18      - X{section}: A section or subsection. 
  19      - X{field}: A tagged field.  These fields provide information 
  20        about specific aspects of a Python object, such as the 
  21        description of a function's parameter, or the author of a 
  22        module. 
  23      - X{literalblock}: A block of literal text.  This text should be 
  24        displayed as it would be displayed in plaintext.  The 
  25        parser removes the appropriate amount of leading whitespace  
  26        from each line in the literal block. 
  27      - X{doctestblock}: A block containing sample python code, 
  28        formatted according to the specifications of the C{doctest} 
  29        module. 
  30      - X{ulist}: An unordered list. 
  31      - X{olist}: An ordered list. 
  32      - X{li}: A list item.  This tag is used both for unordered list 
  33        items and for ordered list items. 
  34   
  35  Additionally, the following X{inline regions} may be used within 
  36  C{para} blocks: 
  37       
  38      - X{code}:   Source code and identifiers. 
  39      - X{math}:   Mathematical expressions. 
  40      - X{index}:  A term which should be included in an index, if one 
  41                   is generated. 
  42      - X{italic}: Italicized text. 
  43      - X{bold}:   Bold-faced text. 
  44      - X{uri}:    A Universal Resource Indicator (URI) or Universal 
  45                   Resource Locator (URL) 
  46      - X{link}:   A Python identifier which should be hyperlinked to 
  47                   the named object's documentation, when possible. 
  48   
  49  The returned DOM tree will conform to the the following Document Type 
  50  Description:: 
  51   
  52     <!ENTITY % colorized '(code | math | index | italic | 
  53                            bold | uri | link | symbol)*'> 
  54   
  55     <!ELEMENT epytext ((para | literalblock | doctestblock | 
  56                        section | ulist | olist)*, fieldlist?)> 
  57   
  58     <!ELEMENT para (#PCDATA | %colorized;)*> 
  59   
  60     <!ELEMENT section (para | listblock | doctestblock | 
  61                        section | ulist | olist)+> 
  62   
  63     <!ELEMENT fieldlist (field+)> 
  64     <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) 
  65                                  ulist | olist)+)> 
  66     <!ELEMENT tag (#PCDATA)> 
  67     <!ELEMENT arg (#PCDATA)> 
  68      
  69     <!ELEMENT literalblock (#PCDATA)> 
  70     <!ELEMENT doctestblock (#PCDATA)> 
  71   
  72     <!ELEMENT ulist (li+)> 
  73     <!ELEMENT olist (li+)> 
  74     <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> 
  75     <!ATTLIST li bullet NMTOKEN #IMPLIED> 
  76     <!ATTLIST olist start NMTOKEN #IMPLIED> 
  77   
  78     <!ELEMENT uri     (name, target)> 
  79     <!ELEMENT link    (name, target)> 
  80     <!ELEMENT name    (#PCDATA | %colorized;)*> 
  81     <!ELEMENT target  (#PCDATA)> 
  82      
  83     <!ELEMENT code    (#PCDATA | %colorized;)*> 
  84     <!ELEMENT math    (#PCDATA | %colorized;)*> 
  85     <!ELEMENT italic  (#PCDATA | %colorized;)*> 
  86     <!ELEMENT bold    (#PCDATA | %colorized;)*> 
  87     <!ELEMENT indexed (#PCDATA | %colorized;)> 
  88   
  89     <!ELEMENT symbol (#PCDATA)> 
  90   
  91  @var SYMBOLS: A list of the of escape symbols that are supported 
  92        by epydoc.  Currently the following symbols are supported: 
  93  <<<SYMBOLS>>> 
  94  """ 
  95  # Note: the symbol list is appended to the docstring automatically, 
  96  # below. 
  97   
  98  __docformat__ = 'epytext en' 
  99   
 100  # Code organization.. 
 101  #   1. parse() 
 102  #   2. tokenize() 
 103  #   3. colorize() 
 104  #   4. helpers 
 105  #   5. testing 
 106   
 107  import re, string, types, sys, os.path 
 108  from xml.dom.minidom import Document, Text 
 109  import xml.dom.minidom 
 110  from epydoc.markup import * 
 111  from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex 
 112  from epydoc.docwriter.html_colorize import colorize_doctestblock 
 113   
 114  ################################################## 
 115  ## Constants 
 116  ################################################## 
 117   
 118  # The possible heading underline characters, listed in order of 
 119  # heading depth.  
 120  _HEADING_CHARS = "=-~" 
 121   
 122  # Escape codes.  These should be needed very rarely. 
 123  _ESCAPES = {'lb':'{', 'rb': '}'} 
 124   
 125  # Symbols.  These can be generated via S{...} escapes. 
 126  SYMBOLS = [ 
 127      # Arrows 
 128      '<-', '->', '^', 'v',  
 129   
 130      # Greek letters 
 131      'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',   
 132      'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',   
 133      'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',   
 134      'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 
 135      'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',   
 136      'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',   
 137      'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',   
 138      'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', 
 139       
 140      # HTML character entities 
 141      'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', 
 142      'lArr', 'rArr', 'uArr', 'dArr', 'hArr',  
 143      'copy', 'times', 'forall', 'exist', 'part', 
 144      'empty', 'isin', 'notin', 'ni', 'prod', 'sum', 
 145      'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 
 146      'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 
 147      'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 
 148      'sube', 'supe', 'oplus', 'otimes', 'perp', 
 149   
 150      # Alternate (long) names 
 151      'infinity', 'integral', 'product', 
 152      '>=', '<=',  
 153      ] 
 154  # Convert to a dictionary, for quick lookup 
 155  _SYMBOLS = {} 
 156  for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 
 157   
 158  # Add symbols to the docstring. 
 159  symblist = '      ' 
 160  symblist += ';\n      '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) 
 161                                for symbol in SYMBOLS]) 
 162  __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) 
 163  del symbol, symblist 
 164   
 165  # Tags for colorizing text. 
 166  _COLORIZING_TAGS = { 
 167      'C': 'code', 
 168      'M': 'math', 
 169      'X': 'indexed', 
 170      'I': 'italic',  
 171      'B': 'bold', 
 172      'U': 'uri', 
 173      'L': 'link',       # A Python identifier that should be linked to  
 174      'E': 'escape',     # escapes characters or creates symbols 
 175      'S': 'symbol', 
 176      'G': 'graph', 
 177      } 
 178   
 179  # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? 
 180  _LINK_COLORIZING_TAGS = ['link', 'uri'] 
 181   
 182  ################################################## 
 183  ## Structuring (Top Level) 
 184  ################################################## 
 185   
186 -def parse(str, errors = None):
187 """ 188 Return a DOM tree encoding the contents of an epytext string. Any 189 errors generated during parsing will be stored in C{errors}. 190 191 @param str: The epytext string to parse. 192 @type str: C{string} 193 @param errors: A list where any errors generated during parsing 194 will be stored. If no list is specified, then fatal errors 195 will generate exceptions, and non-fatal errors will be 196 ignored. 197 @type errors: C{list} of L{ParseError} 198 @return: a DOM tree encoding the contents of an epytext string. 199 @rtype: L{xml.dom.minidom.Document} 200 @raise ParseError: If C{errors} is C{None} and an error is 201 encountered while parsing. 202 """ 203 # Initialize errors list. 204 if errors == None: 205 errors = [] 206 raise_on_error = 1 207 else: 208 raise_on_error = 0 209 210 # Preprocess the string. 211 str = re.sub('\015\012', '\012', str) 212 str = string.expandtabs(str) 213 214 # Tokenize the input string. 215 tokens = _tokenize(str, errors) 216 217 # Have we encountered a field yet? 218 encountered_field = 0 219 220 # Create an XML document to hold the epytext. 221 doc = Document() 222 223 # Maintain two parallel stacks: one contains DOM elements, and 224 # gives the ancestors of the current block. The other contains 225 # indentation values, and gives the indentation of the 226 # corresponding DOM elements. An indentation of "None" reflects 227 # an unknown indentation. However, the indentation must be 228 # greater than, or greater than or equal to, the indentation of 229 # the prior element (depending on what type of DOM element it 230 # corresponds to). No 2 consecutive indent_stack values will be 231 # ever be "None." Use initial dummy elements in the stack, so we 232 # don't have to worry about bounds checking. 233 stack = [None, doc.createElement('epytext')] 234 indent_stack = [-1, None] 235 236 for token in tokens: 237 # Uncomment this for debugging: 238 #print ('%s: %s\n%s: %s\n' % 239 # (''.join(['%-11s' % (t and t.tagName) for t in stack]), 240 # token.tag, ''.join(['%-11s' % i for i in indent_stack]), 241 # token.indent)) 242 243 # Pop any completed blocks off the stack. 244 _pop_completed_blocks(token, stack, indent_stack) 245 246 # If Token has type PARA, colorize and add the new paragraph 247 if token.tag == Token.PARA: 248 _add_para(doc, token, stack, indent_stack, errors) 249 250 # If Token has type HEADING, add the new section 251 elif token.tag == Token.HEADING: 252 _add_section(doc, token, stack, indent_stack, errors) 253 254 # If Token has type LBLOCK, add the new literal block 255 elif token.tag == Token.LBLOCK: 256 stack[-1].appendChild(token.to_dom(doc)) 257 258 # If Token has type DTBLOCK, add the new doctest block 259 elif token.tag == Token.DTBLOCK: 260 stack[-1].appendChild(token.to_dom(doc)) 261 262 # If Token has type BULLET, add the new list/list item/field 263 elif token.tag == Token.BULLET: 264 _add_list(doc, token, stack, indent_stack, errors) 265 else: 266 assert 0, 'Unknown token type: '+token.tag 267 268 # Check if the DOM element we just added was a field.. 269 if stack[-1].tagName == 'field': 270 encountered_field = 1 271 elif encountered_field == 1: 272 if len(stack) <= 3: 273 estr = ("Fields must be the final elements in an "+ 274 "epytext string.") 275 errors.append(StructuringError(estr, token.startline)) 276 277 # If there was an error, then signal it! 278 if len([e for e in errors if e.is_fatal()]) > 0: 279 if raise_on_error: 280 raise errors[0] 281 else: 282 return None 283 284 # Return the top-level epytext DOM element. 285 doc.appendChild(stack[1]) 286 return doc
287
288 -def _pop_completed_blocks(token, stack, indent_stack):
289 """ 290 Pop any completed blocks off the stack. This includes any 291 blocks that we have dedented past, as well as any list item 292 blocks that we've dedented to. The top element on the stack 293 should only be a list if we're about to start a new list 294 item (i.e., if the next token is a bullet). 295 """ 296 indent = token.indent 297 if indent != None: 298 while (len(stack) > 2): 299 pop = 0 300 301 # Dedent past a block 302 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 303 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 304 305 # Dedent to a list item, if it is follwed by another list 306 # item with the same indentation. 307 elif (token.tag == 'bullet' and indent==indent_stack[-2] and 308 stack[-1].tagName in ('li', 'field')): pop=1 309 310 # End of a list (no more list items available) 311 elif (stack[-1].tagName in ('ulist', 'olist') and 312 (token.tag != 'bullet' or token.contents[-1] == ':')): 313 pop=1 314 315 # Pop the block, if it's complete. Otherwise, we're done. 316 if pop == 0: return 317 stack.pop() 318 indent_stack.pop()
319
320 -def _add_para(doc, para_token, stack, indent_stack, errors):
321 """Colorize the given paragraph, and add it to the DOM tree.""" 322 # Check indentation, and update the parent's indentation 323 # when appropriate. 324 if indent_stack[-1] == None: 325 indent_stack[-1] = para_token.indent 326 if para_token.indent == indent_stack[-1]: 327 # Colorize the paragraph and add it. 328 para = _colorize(doc, para_token, errors) 329 stack[-1].appendChild(para) 330 else: 331 estr = "Improper paragraph indentation." 332 errors.append(StructuringError(estr, para_token.startline))
333
334 -def _add_section(doc, heading_token, stack, indent_stack, errors):
335 """Add a new section to the DOM tree, with the given heading.""" 336 if indent_stack[-1] == None: 337 indent_stack[-1] = heading_token.indent 338 elif indent_stack[-1] != heading_token.indent: 339 estr = "Improper heading indentation." 340 errors.append(StructuringError(estr, heading_token.startline)) 341 342 # Check for errors. 343 for tok in stack[2:]: 344 if tok.tagName != "section": 345 estr = "Headings must occur at the top level." 346 errors.append(StructuringError(estr, heading_token.startline)) 347 break 348 if (heading_token.level+2) > len(stack): 349 estr = "Wrong underline character for heading." 350 errors.append(StructuringError(estr, heading_token.startline)) 351 352 # Pop the appropriate number of headings so we're at the 353 # correct level. 354 stack[heading_token.level+2:] = [] 355 indent_stack[heading_token.level+2:] = [] 356 357 # Colorize the heading 358 head = _colorize(doc, heading_token, errors, 'heading') 359 360 # Add the section's and heading's DOM elements. 361 sec = doc.createElement("section") 362 stack[-1].appendChild(sec) 363 stack.append(sec) 364 sec.appendChild(head) 365 indent_stack.append(None)
366
367 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
368 """ 369 Add a new list item or field to the DOM tree, with the given 370 bullet or field tag. When necessary, create the associated 371 list. 372 """ 373 # Determine what type of bullet it is. 374 if bullet_token.contents[-1] == '-': 375 list_type = 'ulist' 376 elif bullet_token.contents[-1] == '.': 377 list_type = 'olist' 378 elif bullet_token.contents[-1] == ':': 379 list_type = 'fieldlist' 380 else: 381 raise AssertionError('Bad Bullet: %r' % bullet_token.contents) 382 383 # Is this a new list? 384 newlist = 0 385 if stack[-1].tagName != list_type: 386 newlist = 1 387 elif list_type == 'olist' and stack[-1].tagName == 'olist': 388 old_listitem = stack[-1].childNodes[-1] 389 old_bullet = old_listitem.getAttribute("bullet").split('.')[:-1] 390 new_bullet = bullet_token.contents.split('.')[:-1] 391 if (new_bullet[:-1] != old_bullet[:-1] or 392 int(new_bullet[-1]) != int(old_bullet[-1])+1): 393 newlist = 1 394 395 # Create the new list. 396 if newlist: 397 if stack[-1].tagName is 'fieldlist': 398 # The new list item is not a field list item (since this 399 # is a new list); but it's indented the same as the field 400 # list. This either means that they forgot to indent the 401 # list, or they are trying to put something after the 402 # field list. The first one seems more likely, so we'll 403 # just warn about that (to avoid confusion). 404 estr = "Lists must be indented." 405 errors.append(StructuringError(estr, bullet_token.startline)) 406 if stack[-1].tagName in ('ulist', 'olist', 'fieldlist'): 407 stack.pop() 408 indent_stack.pop() 409 410 if (list_type != 'fieldlist' and indent_stack[-1] is not None and 411 bullet_token.indent == indent_stack[-1]): 412 # Ignore this error if there's text on the same line as 413 # the comment-opening quote -- epydoc can't reliably 414 # determine the indentation for that line. 415 if bullet_token.startline != 1 or bullet_token.indent != 0: 416 estr = "Lists must be indented." 417 errors.append(StructuringError(estr, bullet_token.startline)) 418 419 if list_type == 'fieldlist': 420 # Fieldlist should be at the top-level. 421 for tok in stack[2:]: 422 if tok.tagName != "section": 423 estr = "Fields must be at the top level." 424 errors.append( 425 StructuringError(estr, bullet_token.startline)) 426 break 427 stack[2:] = [] 428 indent_stack[2:] = [] 429 430 # Add the new list. 431 lst = doc.createElement(list_type) 432 stack[-1].appendChild(lst) 433 stack.append(lst) 434 indent_stack.append(bullet_token.indent) 435 if list_type == 'olist': 436 start = bullet_token.contents.split('.')[:-1] 437 if start != '1': 438 lst.setAttribute("start", start[-1]) 439 440 # Fields are treated somewhat specially: A "fieldlist" 441 # node is created to make the parsing simpler, but fields 442 # are adjoined directly into the "epytext" node, not into 443 # the "fieldlist" node. 444 if list_type == 'fieldlist': 445 li = doc.createElement("field") 446 token_words = bullet_token.contents[1:-1].split(None, 1) 447 tag_elt = doc.createElement("tag") 448 tag_elt.appendChild(doc.createTextNode(token_words[0])) 449 li.appendChild(tag_elt) 450 451 if len(token_words) > 1: 452 arg_elt = doc.createElement("arg") 453 arg_elt.appendChild(doc.createTextNode(token_words[1])) 454 li.appendChild(arg_elt) 455 else: 456 li = doc.createElement("li") 457 if list_type == 'olist': 458 li.setAttribute("bullet", bullet_token.contents) 459 460 # Add the bullet. 461 stack[-1].appendChild(li) 462 stack.append(li) 463 indent_stack.append(None)
464 465 466 ################################################## 467 ## Tokenization 468 ################################################## 469
470 -class Token:
471 """ 472 C{Token}s are an intermediate data structure used while 473 constructing the structuring DOM tree for a formatted docstring. 474 There are five types of C{Token}: 475 476 - Paragraphs 477 - Literal blocks 478 - Doctest blocks 479 - Headings 480 - Bullets 481 482 The text contained in each C{Token} is stored in the 483 C{contents} variable. The string in this variable has been 484 normalized. For paragraphs, this means that it has been converted 485 into a single line of text, with newline/indentation replaced by 486 single spaces. For literal blocks and doctest blocks, this means 487 that the appropriate amount of leading whitespace has been removed 488 from each line. 489 490 Each C{Token} has an indentation level associated with it, 491 stored in the C{indent} variable. This indentation level is used 492 by the structuring procedure to assemble hierarchical blocks. 493 494 @type tag: C{string} 495 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA} 496 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} 497 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. 498 499 @type startline: C{int} 500 @ivar startline: The line on which this C{Token} begins. This 501 line number is only used for issuing errors. 502 503 @type contents: C{string} 504 @ivar contents: The normalized text contained in this C{Token}. 505 506 @type indent: C{int} or C{None} 507 @ivar indent: The indentation level of this C{Token} (in 508 number of leading spaces). A value of C{None} indicates an 509 unknown indentation; this is used for list items and fields 510 that begin with one-line paragraphs. 511 512 @type level: C{int} or C{None} 513 @ivar level: The heading-level of this C{Token} if it is a 514 heading; C{None}, otherwise. Valid heading levels are 0, 1, 515 and 2. 516 517 @type PARA: C{string} 518 @cvar PARA: The C{tag} value for paragraph C{Token}s. 519 @type LBLOCK: C{string} 520 @cvar LBLOCK: The C{tag} value for literal C{Token}s. 521 @type DTBLOCK: C{string} 522 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. 523 @type HEADING: C{string} 524 @cvar HEADING: The C{tag} value for heading C{Token}s. 525 @type BULLET: C{string} 526 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag} 527 value is also used for field tag C{Token}s, since fields 528 function syntactically the same as list items. 529 """ 530 # The possible token types. 531 PARA = "para" 532 LBLOCK = "literalblock" 533 DTBLOCK = "doctestblock" 534 HEADING = "heading" 535 BULLET = "bullet" 536
537 - def __init__(self, tag, startline, contents, indent, level=None):
538 """ 539 Create a new C{Token}. 540 541 @param tag: The type of the new C{Token}. 542 @type tag: C{string} 543 @param startline: The line on which the new C{Token} begins. 544 @type startline: C{int} 545 @param contents: The normalized contents of the new C{Token}. 546 @type contents: C{string} 547 @param indent: The indentation of the new C{Token} (in number 548 of leading spaces). A value of C{None} indicates an 549 unknown indentation. 550 @type indent: C{int} or C{None} 551 @param level: The heading-level of this C{Token} if it is a 552 heading; C{None}, otherwise. 553 @type level: C{int} or C{None} 554 """ 555 self.tag = tag 556 self.startline = startline 557 self.contents = contents 558 self.indent = indent 559 self.level = level
560
561 - def __repr__(self):
562 """ 563 @rtype: C{string} 564 @return: the formal representation of this C{Token}. 565 C{Token}s have formal representaitons of the form:: 566 <Token: para at line 12> 567 """ 568 return '<Token: %s at line %s>' % (self.tag, self.startline)
569
570 - def to_dom(self, doc):
571 """ 572 @return: a DOM representation of this C{Token}. 573 @rtype: L{xml.dom.minidom.Element} 574 """ 575 e = doc.createElement(self.tag) 576 e.appendChild(doc.createTextNode(self.contents)) 577 return e
578 579 # Construct regular expressions for recognizing bullets. These are 580 # global so they don't have to be reconstructed each time we tokenize 581 # a docstring. 582 _ULIST_BULLET = '[-]( +|$)' 583 _OLIST_BULLET = '(\d+[.])+( +|$)' 584 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:( +|$)' 585 _BULLET_RE = re.compile(_ULIST_BULLET + '|' + 586 _OLIST_BULLET + '|' + 587 _FIELD_BULLET) 588 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) 589 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) 590 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET 591
592 -def _tokenize_doctest(lines, start, block_indent, tokens, errors):
593 """ 594 Construct a L{Token} containing the doctest block starting at 595 C{lines[start]}, and append it to C{tokens}. C{block_indent} 596 should be the indentation of the doctest block. Any errors 597 generated while tokenizing the doctest block will be appended to 598 C{errors}. 599 600 @param lines: The list of lines to be tokenized 601 @param start: The index into C{lines} of the first line of the 602 doctest block to be tokenized. 603 @param block_indent: The indentation of C{lines[start]}. This is 604 the indentation of the doctest block. 605 @param errors: A list where any errors generated during parsing 606 will be stored. If no list is specified, then errors will 607 generate exceptions. 608 @return: The line number of the first line following the doctest 609 block. 610 611 @type lines: C{list} of C{string} 612 @type start: C{int} 613 @type block_indent: C{int} 614 @type tokens: C{list} of L{Token} 615 @type errors: C{list} of L{ParseError} 616 @rtype: C{int} 617 """ 618 # If they dedent past block_indent, keep track of the minimum 619 # indentation. This is used when removing leading indentation 620 # from the lines of the doctest block. 621 min_indent = block_indent 622 623 linenum = start + 1 624 while linenum < len(lines): 625 # Find the indentation of this line. 626 line = lines[linenum] 627 indent = len(line) - len(line.lstrip()) 628 629 # A blank line ends doctest block. 630 if indent == len(line): break 631 632 # A Dedent past block_indent is an error. 633 if indent < block_indent: 634 min_indent = min(min_indent, indent) 635 estr = 'Improper doctest block indentation.' 636 errors.append(TokenizationError(estr, linenum)) 637 638 # Go on to the next line. 639 linenum += 1 640 641 # Add the token, and return the linenum after the token ends. 642 contents = [line[min_indent:] for line in lines[start:linenum]] 643 contents = '\n'.join(contents) 644 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) 645 return linenum
646
647 -def _tokenize_literal(lines, start, block_indent, tokens, errors):
648 """ 649 Construct a L{Token} containing the literal block starting at 650 C{lines[start]}, and append it to C{tokens}. C{block_indent} 651 should be the indentation of the literal block. Any errors 652 generated while tokenizing the literal block will be appended to 653 C{errors}. 654 655 @param lines: The list of lines to be tokenized 656 @param start: The index into C{lines} of the first line of the 657 literal block to be tokenized. 658 @param block_indent: The indentation of C{lines[start]}. This is 659 the indentation of the literal block. 660 @param errors: A list of the errors generated by parsing. Any 661 new errors generated while will tokenizing this paragraph 662 will be appended to this list. 663 @return: The line number of the first line following the literal 664 block. 665 666 @type lines: C{list} of C{string} 667 @type start: C{int} 668 @type block_indent: C{int} 669 @type tokens: C{list} of L{Token} 670 @type errors: C{list} of L{ParseError} 671 @rtype: C{int} 672 """ 673 linenum = start + 1 674 while linenum < len(lines): 675 # Find the indentation of this line. 676 line = lines[linenum] 677 indent = len(line) - len(line.lstrip()) 678 679 # A Dedent to block_indent ends the literal block. 680 # (Ignore blank likes, though) 681 if len(line) != indent and indent <= block_indent: 682 break 683 684 # Go on to the next line. 685 linenum += 1 686 687 # Add the token, and return the linenum after the token ends. 688 contents = [line[block_indent+1:] for line in lines[start:linenum]] 689 contents = '\n'.join(contents) 690 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) 691 tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) 692 return linenum
693
694 -def _tokenize_listart(lines, start, bullet_indent, tokens, errors):
695 """ 696 Construct L{Token}s for the bullet and the first paragraph of the 697 list item (or field) starting at C{lines[start]}, and append them 698 to C{tokens}. C{bullet_indent} should be the indentation of the 699 list item. Any errors generated while tokenizing will be 700 appended to C{errors}. 701 702 @param lines: The list of lines to be tokenized 703 @param start: The index into C{lines} of the first line of the 704 list item to be tokenized. 705 @param bullet_indent: The indentation of C{lines[start]}. This is 706 the indentation of the list item. 707 @param errors: A list of the errors generated by parsing. Any 708 new errors generated while will tokenizing this paragraph 709 will be appended to this list. 710 @return: The line number of the first line following the list 711 item's first paragraph. 712 713 @type lines: C{list} of C{string} 714 @type start: C{int} 715 @type bullet_indent: C{int} 716 @type tokens: C{list} of L{Token} 717 @type errors: C{list} of L{ParseError} 718 @rtype: C{int} 719 """ 720 linenum = start + 1 721 para_indent = None 722 doublecolon = lines[start].rstrip()[-2:] == '::' 723 724 # Get the contents of the bullet. 725 para_start = _BULLET_RE.match(lines[start], bullet_indent).end() 726 bcontents = lines[start][bullet_indent:para_start].strip() 727 728 while linenum < len(lines): 729 # Find the indentation of this line. 730 line = lines[linenum] 731 indent = len(line) - len(line.lstrip()) 732 733 # "::" markers end paragraphs. 734 if doublecolon: break 735 if line.rstrip()[-2:] == '::': doublecolon = 1 736 737 # A blank line ends the token 738 if indent == len(line): break 739 740 # Dedenting past bullet_indent ends the list item. 741 if indent < bullet_indent: break 742 743 # A line beginning with a bullet ends the token. 744 if _BULLET_RE.match(line, indent): break 745 746 # If this is the second line, set the paragraph indentation, or 747 # end the token, as appropriate. 748 if para_indent == None: para_indent = indent 749 750 # A change in indentation ends the token 751 if indent != para_indent: break 752 753 # Go on to the next line. 754 linenum += 1 755 756 # Add the bullet token. 757 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent)) 758 759 # Add the paragraph token. 760 pcontents = ([lines[start][para_start:].strip()] + 761 [line.strip() for line in lines[start+1:linenum]]) 762 pcontents = ' '.join(pcontents).strip() 763 if pcontents: 764 tokens.append(Token(Token.PARA, start, pcontents, para_indent)) 765 766 # Return the linenum after the paragraph token ends. 767 return linenum
768
769 -def _tokenize_para(lines, start, para_indent, tokens, errors):
770 """ 771 Construct a L{Token} containing the paragraph starting at 772 C{lines[start]}, and append it to C{tokens}. C{para_indent} 773 should be the indentation of the paragraph . Any errors 774 generated while tokenizing the paragraph will be appended to 775 C{errors}. 776 777 @param lines: The list of lines to be tokenized 778 @param start: The index into C{lines} of the first line of the 779 paragraph to be tokenized. 780 @param para_indent: The indentation of C{lines[start]}. This is 781 the indentation of the paragraph. 782 @param errors: A list of the errors generated by parsing. Any 783 new errors generated while will tokenizing this paragraph 784 will be appended to this list. 785 @return: The line number of the first line following the 786 paragraph. 787 788 @type lines: C{list} of C{string} 789 @type start: C{int} 790 @type para_indent: C{int} 791 @type tokens: C{list} of L{Token} 792 @type errors: C{list} of L{ParseError} 793 @rtype: C{int} 794 """ 795 linenum = start + 1 796 doublecolon = 0 797 while linenum < len(lines): 798 # Find the indentation of this line. 799 line = lines[linenum] 800 indent = len(line) - len(line.lstrip()) 801 802 # "::" markers end paragraphs. 803 if doublecolon: break 804 if line.rstrip()[-2:] == '::': doublecolon = 1 805 806 # Blank lines end paragraphs 807 if indent == len(line): break 808 809 # Indentation changes end paragraphs 810 if indent != para_indent: break 811 812 # List bullets end paragraphs 813 if _BULLET_RE.match(line, indent): break 814 815 # Check for mal-formatted field items. 816 if line[indent] == '@': 817 estr = "Possible mal-formatted field item." 818 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 819 820 # Go on to the next line. 821 linenum += 1 822 823 contents = [line.strip() for line in lines[start:linenum]] 824 825 # Does this token look like a heading? 826 if ((len(contents) < 2) or 827 (contents[1][0] not in _HEADING_CHARS) or 828 (abs(len(contents[0])-len(contents[1])) > 5)): 829 looks_like_heading = 0 830 else: 831 looks_like_heading = 1 832 for char in contents[1]: 833 if char != contents[1][0]: 834 looks_like_heading = 0 835 break 836 837 if looks_like_heading: 838 if len(contents[0]) != len(contents[1]): 839 estr = ("Possible heading typo: the number of "+ 840 "underline characters must match the "+ 841 "number of heading characters.") 842 errors.append(TokenizationError(estr, start, is_fatal=0)) 843 else: 844 level = _HEADING_CHARS.index(contents[1][0]) 845 tokens.append(Token(Token.HEADING, start, 846 contents[0], para_indent, level)) 847 return start+2 848 849 # Add the paragraph token, and return the linenum after it ends. 850 contents = ' '.join(contents) 851 tokens.append(Token(Token.PARA, start, contents, para_indent)) 852 return linenum
853
854 -def _tokenize(str, errors):
855 """ 856 Split a given formatted docstring into an ordered list of 857 C{Token}s, according to the epytext markup rules. 858 859 @param str: The epytext string 860 @type str: C{string} 861 @param errors: A list where any errors generated during parsing 862 will be stored. If no list is specified, then errors will 863 generate exceptions. 864 @type errors: C{list} of L{ParseError} 865 @return: a list of the C{Token}s that make up the given string. 866 @rtype: C{list} of L{Token} 867 """ 868 tokens = [] 869 lines = str.split('\n') 870 871 # Scan through the lines, determining what @type of token we're 872 # dealing with, and tokenizing it, as appropriate. 873 linenum = 0 874 while linenum < len(lines): 875 # Get the current line and its indentation. 876 line = lines[linenum] 877 indent = len(line)-len(line.lstrip()) 878 879 if indent == len(line): 880 # Ignore blank lines. 881 linenum += 1 882 continue 883 elif line[indent:indent+4] == '>>> ': 884 # blocks starting with ">>> " are doctest block tokens. 885 linenum = _tokenize_doctest(lines, linenum, indent, 886 tokens, errors) 887 elif _BULLET_RE.match(line, indent): 888 # blocks starting with a bullet are LI start tokens. 889 linenum = _tokenize_listart(lines, linenum, indent, 890 tokens, errors) 891 if tokens[-1].indent != None: 892 indent = tokens[-1].indent 893 else: 894 # Check for mal-formatted field items. 895 if line[indent] == '@': 896 estr = "Possible mal-formatted field item." 897 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 898 899 # anything else is either a paragraph or a heading. 900 linenum = _tokenize_para(lines, linenum, indent, tokens, errors) 901 902 # Paragraph tokens ending in '::' initiate literal blocks. 903 if (tokens[-1].tag == Token.PARA and 904 tokens[-1].contents[-2:] == '::'): 905 tokens[-1].contents = tokens[-1].contents[:-1] 906 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) 907 908 return tokens
909 910 911 ################################################## 912 ## Inline markup ("colorizing") 913 ################################################## 914 915 # Assorted regular expressions used for colorizing. 916 _BRACE_RE = re.compile('{|}') 917 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') 918
919 -def _colorize(doc, token, errors, tagName='para'):
920 """ 921 Given a string containing the contents of a paragraph, produce a 922 DOM C{Element} encoding that paragraph. Colorized regions are 923 represented using DOM C{Element}s, and text is represented using 924 DOM C{Text}s. 925 926 @param errors: A list of errors. Any newly generated errors will 927 be appended to this list. 928 @type errors: C{list} of C{string} 929 930 @param tagName: The element tag for the DOM C{Element} that should 931 be generated. 932 @type tagName: C{string} 933 934 @return: a DOM C{Element} encoding the given paragraph. 935 @returntype: C{Element} 936 """ 937 str = token.contents 938 linenum = 0 939 940 # Maintain a stack of DOM elements, containing the ancestors of 941 # the text currently being analyzed. New elements are pushed when 942 # "{" is encountered, and old elements are popped when "}" is 943 # encountered. 944 stack = [doc.createElement(tagName)] 945 946 # This is just used to make error-reporting friendlier. It's a 947 # stack parallel to "stack" containing the index of each element's 948 # open brace. 949 openbrace_stack = [0] 950 951 # Process the string, scanning for '{' and '}'s. start is the 952 # index of the first unprocessed character. Each time through the 953 # loop, we process the text from the first unprocessed character 954 # to the next open or close brace. 955 start = 0 956 while 1: 957 match = _BRACE_RE.search(str, start) 958 if match == None: break 959 end = match.start() 960 961 # Open braces start new colorizing elements. When preceeded 962 # by a capital letter, they specify a colored region, as 963 # defined by the _COLORIZING_TAGS dictionary. Otherwise, 964 # use a special "literal braces" element (with tag "litbrace"), 965 # and convert them to literal braces once we find the matching 966 # close-brace. 967 if match.group() == '{': 968 if (end>0) and 'A' <= str[end-1] <= 'Z': 969 if (end-1) > start: 970 stack[-1].appendChild(doc.createTextNode(str[start:end-1])) 971 if not _COLORIZING_TAGS.has_key(str[end-1]): 972 estr = "Unknown inline markup tag." 973 errors.append(ColorizingError(estr, token, end-1)) 974 stack.append(doc.createElement('unknown')) 975 else: 976 tag = _COLORIZING_TAGS[str[end-1]] 977 stack.append(doc.createElement(tag)) 978 else: 979 if end > start: 980 stack[-1].appendChild(doc.createTextNode(str[start:end])) 981 stack.append(doc.createElement('litbrace')) 982 openbrace_stack.append(end) 983 stack[-2].appendChild(stack[-1]) 984 985 # Close braces end colorizing elements. 986 elif match.group() == '}': 987 # Check for (and ignore) unbalanced braces. 988 if len(stack) <= 1: 989 estr = "Unbalanced '}'." 990 errors.append(ColorizingError(estr, token, end)) 991 start = end + 1 992 continue 993 994 # Add any remaining text. 995 if end > start: 996 stack[-1].appendChild(doc.createTextNode(str[start:end])) 997 998 # Special handling for symbols: 999 if stack[-1].tagName == 'symbol': 1000 if (len(stack[-1].childNodes) != 1 or 1001 not isinstance(stack[-1].childNodes[0], Text)): 1002 estr = "Invalid symbol code." 1003 errors.append(ColorizingError(estr, token, end)) 1004 else: 1005 symb = stack[-1].childNodes[0].data 1006 if _SYMBOLS.has_key(symb): 1007 # It's a symbol 1008 symbol = doc.createElement('symbol') 1009 stack[-2].removeChild(stack[-1]) 1010 stack[-2].appendChild(symbol) 1011 symbol.appendChild(doc.createTextNode(symb)) 1012 else: 1013 estr = "Invalid symbol code." 1014 errors.append(ColorizingError(estr, token, end)) 1015 1016 # Special handling for escape elements: 1017 if stack[-1].tagName == 'escape': 1018 if (len(stack[-1].childNodes) != 1 or 1019 not isinstance(stack[-1].childNodes[0], Text)): 1020 estr = "Invalid escape code." 1021 errors.append(ColorizingError(estr, token, end)) 1022 else: 1023 escp = stack[-1].childNodes[0].data 1024 if _ESCAPES.has_key(escp): 1025 # It's an escape from _ESCPAES 1026 stack[-2].removeChild(stack[-1]) 1027 escp = _ESCAPES[escp] 1028 stack[-2].appendChild(doc.createTextNode(escp)) 1029 elif len(escp) == 1: 1030 # It's a single-character escape (eg E{.}) 1031 stack[-2].removeChild(stack[-1]) 1032 stack[-2].appendChild(doc.createTextNode(escp)) 1033 else: 1034 estr = "Invalid escape code." 1035 errors.append(ColorizingError(estr, token, end)) 1036 1037 # Special handling for literal braces elements: 1038 if stack[-1].tagName == 'litbrace': 1039 variables = stack[-1].childNodes 1040 stack[-2].removeChild(stack[-1]) 1041 stack[-2].appendChild(doc.createTextNode('{')) 1042 for child in variables: 1043 stack[-2].appendChild(child) 1044 stack[-2].appendChild(doc.createTextNode('}')) 1045 1046 # Special handling for graphs: 1047 if stack[-1].tagName == 'graph': 1048 _colorize_graph(doc, stack[-1], token, end, errors) 1049 1050 # Special handling for link-type elements: 1051 if stack[-1].tagName in _LINK_COLORIZING_TAGS: 1052 _colorize_link(doc, stack[-1], token, end, errors) 1053 1054 # Pop the completed element. 1055 openbrace_stack.pop() 1056 stack.pop() 1057 1058 start = end+1 1059 1060 # Add any final text. 1061 if start < len(str): 1062 stack[-1].appendChild(doc.createTextNode(str[start:])) 1063 1064 if len(stack) != 1: 1065 estr = "Unbalanced '{'." 1066 errors.append(ColorizingError(estr, token, openbrace_stack[-1])) 1067 1068 return stack[0]
1069 1070 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] 1071
1072 -def _colorize_graph(doc, graph, token, end, errors):
1073 """ 1074 Eg:: 1075 G{classtree} 1076 G{classtree x, y, z} 1077 G{importgraph} 1078 """ 1079 bad_graph_spec = False 1080 1081 children = graph.childNodes[:] 1082 for child in children: graph.removeChild(child) 1083 1084 if len(children) != 1 or not isinstance(children[0], Text): 1085 bad_graph_spec = "Bad graph specification" 1086 else: 1087 pieces = children[0].data.split(None, 1) 1088 graphtype = pieces[0].replace(':','').strip().lower() 1089 if graphtype in GRAPH_TYPES: 1090 if len(pieces) == 2: 1091 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): 1092 args = pieces[1].replace(',', ' ').replace(':','').split() 1093 else: 1094 bad_graph_spec = "Bad graph arg list" 1095 else: 1096 args = [] 1097 else: 1098 bad_graph_spec = ("Bad graph type %s -- use one of %s" % 1099 (pieces[0], ', '.join(GRAPH_TYPES))) 1100 1101 if bad_graph_spec: 1102 errors.append(ColorizingError(bad_graph_spec, token, end)) 1103 graph.appendChild(doc.createTextNode('none')) 1104 graph.appendChild(doc.createTextNode('')) 1105 return 1106 1107 graph.appendChild(doc.createTextNode(graphtype)) 1108 for arg in args: 1109 graph.appendChild(doc.createTextNode(arg))
1110 1162 1163 ################################################## 1164 ## Formatters 1165 ################################################## 1166
1167 -def to_epytext(tree, indent=0, seclevel=0):
1168 """ 1169 Convert a DOM document encoding epytext back to an epytext string. 1170 This is the inverse operation from L{parse}. I.e., assuming there 1171 are no errors, the following is true: 1172 - C{parse(to_epytext(tree)) == tree} 1173 1174 The inverse is true, except that whitespace, line wrapping, and 1175 character escaping may be done differently. 1176 - C{to_epytext(parse(str)) == str} (approximately) 1177 1178 @param tree: A DOM document encoding of an epytext string. 1179 @type tree: L{xml.dom.minidom.Document} 1180 @param indent: The indentation for the string representation of 1181 C{tree}. Each line of the returned string will begin with 1182 C{indent} space characters. 1183 @type indent: C{int} 1184 @param seclevel: The section level that C{tree} appears at. This 1185 is used to generate section headings. 1186 @type seclevel: C{int} 1187 @return: The epytext string corresponding to C{tree}. 1188 @rtype: C{string} 1189 """ 1190 if isinstance(tree, Document): 1191 return to_epytext(tree.childNodes[0], indent, seclevel) 1192 if isinstance(tree, Text): 1193 str = re.sub(r'\{', '\0', tree.data) 1194 str = re.sub(r'\}', '\1', str) 1195 return str 1196 1197 if tree.tagName == 'epytext': indent -= 2 1198 if tree.tagName == 'section': seclevel += 1 1199 variables = [to_epytext(c, indent+2, seclevel) for c in tree.childNodes] 1200 childstr = ''.join(variables) 1201 1202 # Clean up for literal blocks (add the double "::" back) 1203 childstr = re.sub(':(\s*)\2', '::\\1', childstr) 1204 1205 if tree.tagName == 'para': 1206 str = wordwrap(childstr, indent)+'\n' 1207 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1208 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1209 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1210 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1211 str = re.sub('\0', 'E{lb}', str) 1212 str = re.sub('\1', 'E{rb}', str) 1213 return str 1214 elif tree.tagName == 'li': 1215 bulletAttr = tree.getAttributeNode('bullet') 1216 if bulletAttr: bullet = bulletAttr.value 1217 else: bullet = '-' 1218 return indent*' '+ bullet + ' ' + childstr.lstrip() 1219 elif tree.tagName == 'heading': 1220 str = re.sub('\0', 'E{lb}',childstr) 1221 str = re.sub('\1', 'E{rb}', str) 1222 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1223 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' 1224 elif tree.tagName == 'doctestblock': 1225 str = re.sub('\0', '{', childstr) 1226 str = re.sub('\1', '}', str) 1227 lines = [' '+indent*' '+line for line in str.split('\n')] 1228 return '\n'.join(lines) + '\n\n' 1229 elif tree.tagName == 'literalblock': 1230 str = re.sub('\0', '{', childstr) 1231 str = re.sub('\1', '}', str) 1232 lines = [(indent+1)*' '+line for line in str.split('\n')] 1233 return '\2' + '\n'.join(lines) + '\n\n' 1234 elif tree.tagName == 'field': 1235 numargs = 0 1236 while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 1237 tag = variables[0] 1238 args = variables[1:1+numargs] 1239 body = variables[1+numargs:] 1240 str = (indent)*' '+'@'+variables[0] 1241 if args: str += '(' + ', '.join(args) + ')' 1242 return str + ':\n' + ''.join(body) 1243 elif tree.tagName == 'target': 1244 return '<%s>' % childstr 1245 elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext', 1246 'section', 'olist', 'ulist', 'name'): 1247 return childstr 1248 elif tree.tagName == 'symbol': 1249 return 'E{%s}' % childstr 1250 elif tree.tagName == 'graph': 1251 return 'G{%s}' % ' '.join(variables) 1252 else: 1253 for (tag, name) in _COLORIZING_TAGS.items(): 1254 if name == tree.tagName: 1255 return '%s{%s}' % (tag, childstr) 1256 raise ValueError('Unknown DOM element %r' % tree.tagName)
1257
1258 -def to_plaintext(tree, indent=0, seclevel=0):
1259 """ 1260 Convert a DOM document encoding epytext to a string representation. 1261 This representation is similar to the string generated by 1262 C{to_epytext}, but C{to_plaintext} removes inline markup, prints 1263 escaped characters in unescaped form, etc. 1264 1265 @param tree: A DOM document encoding of an epytext string. 1266 @type tree: L{xml.dom.minidom.Document} 1267 @param indent: The indentation for the string representation of 1268 C{tree}. Each line of the returned string will begin with 1269 C{indent} space characters. 1270 @type indent: C{int} 1271 @param seclevel: The section level that C{tree} appears at. This 1272 is used to generate section headings. 1273 @type seclevel: C{int} 1274 @return: The epytext string corresponding to C{tree}. 1275 @rtype: C{string} 1276 """ 1277 if isinstance(tree, Document): 1278 return to_plaintext(tree.childNodes[0], indent, seclevel) 1279 if isinstance(tree, Text): return tree.data 1280 1281 if tree.tagName == 'section': seclevel += 1 1282 1283 # Figure out the child indent level. 1284 if tree.tagName == 'epytext': cindent = indent 1285 elif tree.tagName == 'li' and tree.getAttributeNode('bullet'): 1286 cindent = indent + 1 + len(tree.getAttributeNode('bullet').value) 1287 else: 1288 cindent = indent + 2 1289 variables = [to_plaintext(c, cindent, seclevel) for c in tree.childNodes] 1290 childstr = ''.join(variables) 1291 1292 if tree.tagName == 'para': 1293 return wordwrap(childstr, indent)+'\n' 1294 elif tree.tagName == 'li': 1295 # We should be able to use getAttribute here; but there's no 1296 # convenient way to test if an element has an attribute.. 1297 bulletAttr = tree.getAttributeNode('bullet') 1298 if bulletAttr: bullet = bulletAttr.value 1299 else: bullet = '-' 1300 return indent*' ' + bullet + ' ' + childstr.lstrip() 1301 elif tree.tagName == 'heading': 1302 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1303 return ((indent-2)*' ' + childstr + '\n' + 1304 (indent-2)*' ' + uline + '\n') 1305 elif tree.tagName == 'doctestblock': 1306 lines = [(indent+2)*' '+line for line in childstr.split('\n')] 1307 return '\n'.join(lines) + '\n\n' 1308 elif tree.tagName == 'literalblock': 1309 lines = [(indent+1)*' '+line for line in childstr.split('\n')] 1310 return '\n'.join(lines) + '\n\n' 1311 elif tree.tagName == 'fieldlist': 1312 return childstr 1313 elif tree.tagName == 'field': 1314 numargs = 0 1315 while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 1316 tag = variables[0] 1317 args = variables[1:1+numargs] 1318 body = variables[1+numargs:] 1319 str = (indent)*' '+'@'+variables[0] 1320 if args: str += '(' + ', '.join(args) + ')' 1321 return str + ':\n' + ''.join(body) 1322 elif tree.tagName == 'uri': 1323 if len(variables) != 2: raise ValueError('Bad URI ') 1324 elif variables[0] == variables[1]: return '<%s>' % variables[1] 1325 else: return '%r<%s>' % (variables[0], variables[1]) 1326 elif tree.tagName == 'link': 1327 if len(variables) != 2: raise ValueError('Bad Link') 1328 return '%s' % variables[0] 1329 elif tree.tagName in ('olist', 'ulist'): 1330 # [xx] always use condensed lists. 1331 ## Use a condensed list if each list item is 1 line long. 1332 #for child in variables: 1333 # if child.count('\n') > 2: return childstr 1334 return childstr.replace('\n\n', '\n')+'\n' 1335 elif tree.tagName == 'symbol': 1336 return '%s' % childstr 1337 elif tree.tagName == 'graph': 1338 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) 1339 else: 1340 # Assume that anything else can be passed through. 1341 return childstr
1342
1343 -def to_debug(tree, indent=4, seclevel=0):
1344 """ 1345 Convert a DOM document encoding epytext back to an epytext string, 1346 annotated with extra debugging information. This function is 1347 similar to L{to_epytext}, but it adds explicit information about 1348 where different blocks begin, along the left margin. 1349 1350 @param tree: A DOM document encoding of an epytext string. 1351 @type tree: L{xml.dom.minidom.Document} 1352 @param indent: The indentation for the string representation of 1353 C{tree}. Each line of the returned string will begin with 1354 C{indent} space characters. 1355 @type indent: C{int} 1356 @param seclevel: The section level that C{tree} appears at. This 1357 is used to generate section headings. 1358 @type seclevel: C{int} 1359 @return: The epytext string corresponding to C{tree}. 1360 @rtype: C{string} 1361 """ 1362 if isinstance(tree, Document): 1363 return to_debug(tree.childNodes[0], indent, seclevel) 1364 if isinstance(tree, Text): 1365 str = re.sub(r'\{', '\0', tree.data) 1366 str = re.sub(r'\}', '\1', str) 1367 return str 1368 1369 if tree.tagName == 'section': seclevel += 1 1370 variables = [to_debug(c, indent+2, seclevel) for c in tree.childNodes] 1371 childstr = ''.join(variables) 1372 1373 # Clean up for literal blocks (add the double "::" back) 1374 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) 1375 1376 if tree.tagName == 'para': 1377 str = wordwrap(childstr, indent-6, 69)+'\n' 1378 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1379 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1380 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1381 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1382 str = re.sub('\0', 'E{lb}', str) 1383 str = re.sub('\1', 'E{rb}', str) 1384 lines = str.rstrip().split('\n') 1385 lines[0] = ' P>|' + lines[0] 1386 lines[1:] = [' |'+l for l in lines[1:]] 1387 return '\n'.join(lines)+'\n |\n' 1388 elif tree.tagName == 'li': 1389 bulletAttr = tree.getAttributeNode('bullet') 1390 if bulletAttr: bullet = bulletAttr.value 1391 else: bullet = '-' 1392 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() 1393 elif tree.tagName in ('olist', 'ulist'): 1394 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] 1395 elif tree.tagName == 'heading': 1396 str = re.sub('\0', 'E{lb}', childstr) 1397 str = re.sub('\1', 'E{rb}', str) 1398 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1399 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + 1400 ' |'+(indent-8)*' ' + uline + '\n') 1401 elif tree.tagName == 'doctestblock': 1402 str = re.sub('\0', '{', childstr) 1403 str = re.sub('\1', '}', str) 1404 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] 1405 lines[0] = 'DTST>'+lines[0][5:] 1406 return '\n'.join(lines) + '\n |\n' 1407 elif tree.tagName == 'literalblock': 1408 str = re.sub('\0', '{', childstr) 1409 str = re.sub('\1', '}', str) 1410 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] 1411 lines[0] = ' LIT>'+lines[0][5:] 1412 return '\2' + '\n'.join(lines) + '\n |\n' 1413 elif tree.tagName == 'field': 1414 numargs = 0 1415 while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1 1416 tag = variables[0] 1417 args = variables[1:1+numargs] 1418 body = variables[1+numargs:] 1419 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] 1420 if args: str += '(' + ', '.join(args) + ')' 1421 return str + ':\n' + ''.join(body) 1422 elif tree.tagName == 'target': 1423 return '<%s>' % childstr 1424 elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext', 1425 'section', 'olist', 'ulist', 'name'): 1426 return childstr 1427 elif tree.tagName == 'symbol': 1428 return 'E{%s}' % childstr 1429 elif tree.tagName == 'graph': 1430 return 'G{%s}' % ' '.join(variables) 1431 else: 1432 for (tag, name) in _COLORIZING_TAGS.items(): 1433 if name == tree.tagName: 1434 return '%s{%s}' % (tag, childstr) 1435 raise ValueError('Unknown DOM element %r' % tree.tagName)
1436 1437 ################################################## 1438 ## Top-Level Wrapper function 1439 ##################################################
1440 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1441 """ 1442 Pretty-parse the string. This parses the string, and catches any 1443 warnings or errors produced. Any warnings and errors are 1444 displayed, and the resulting DOM parse structure is returned. 1445 1446 @param str: The string to parse. 1447 @type str: C{string} 1448 @param show_warnings: Whether or not to display non-fatal errors 1449 generated by parsing C{str}. 1450 @type show_warnings: C{boolean} 1451 @param show_errors: Whether or not to display fatal errors 1452 generated by parsing C{str}. 1453 @type show_errors: C{boolean} 1454 @param stream: The stream that warnings and errors should be 1455 written to. 1456 @type stream: C{stream} 1457 @return: a DOM document encoding the contents of C{str}. 1458 @rtype: L{xml.dom.minidom.Document} 1459 @raise SyntaxError: If any fatal errors were encountered. 1460 """ 1461 errors = [] 1462 confused = 0 1463 try: 1464 val = parse(str, errors) 1465 warnings = [e for e in errors if not e.is_fatal()] 1466 errors = [e for e in errors if e.is_fatal()] 1467 except: 1468 confused = 1 1469 1470 if not show_warnings: warnings = [] 1471 warnings.sort() 1472 errors.sort() 1473 if warnings: 1474 print >>stream, '='*SCRWIDTH 1475 print >>stream, "WARNINGS" 1476 print >>stream, '-'*SCRWIDTH 1477 for warning in warnings: 1478 print >>stream, warning.as_warning() 1479 print >>stream, '='*SCRWIDTH 1480 if errors and show_errors: 1481 if not warnings: print >>stream, '='*SCRWIDTH 1482 print >>stream, "ERRORS" 1483 print >>stream, '-'*SCRWIDTH 1484 for error in errors: 1485 print >>stream, error 1486 print >>stream, '='*SCRWIDTH 1487 1488 if confused: raise 1489 elif errors: raise SyntaxError('Encountered Errors') 1490 else: return val
1491 1492 ################################################## 1493 ## Parse Errors 1494 ################################################## 1495
1496 -class TokenizationError(ParseError):
1497 """ 1498 An error generated while tokenizing a formatted documentation 1499 string. 1500 """
1501
1502 -class StructuringError(ParseError):
1503 """ 1504 An error generated while structuring a formatted documentation 1505 string. 1506 """
1507
1508 -class ColorizingError(ParseError):
1509 """ 1510 An error generated while colorizing a paragraph. 1511 """
1512 - def __init__(self, descr, token, charnum, is_fatal=1):
1513 """ 1514 Construct a new colorizing exception. 1515 1516 @param descr: A short description of the error. 1517 @type descr: C{string} 1518 @param token: The token where the error occured 1519 @type token: L{Token} 1520 @param charnum: The character index of the position in 1521 C{token} where the error occured. 1522 @type charnum: C{int} 1523 """ 1524 ParseError.__init__(self, descr, token.startline, is_fatal) 1525 self.token = token 1526 self.charnum = charnum
1527 1528 CONTEXT_RANGE = 20
1529 - def descr(self):
1530 RANGE = self.CONTEXT_RANGE 1531 if self.charnum <= RANGE: 1532 left = self.token.contents[0:self.charnum] 1533 else: 1534 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] 1535 if (len(self.token.contents)-self.charnum) <= RANGE: 1536 right = self.token.contents[self.charnum:] 1537 else: 1538 right = (self.token.contents[self.charnum:self.charnum+RANGE] 1539 + '...') 1540 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1541 1542 ################################################## 1543 ## Convenience parsers 1544 ################################################## 1545
1546 -def parse_as_literal(str):
1547 """ 1548 Return a DOM document matching the epytext DTD, containing a 1549 single literal block. That literal block will include the 1550 contents of the given string. This method is typically used as a 1551 fall-back when the parser fails. 1552 1553 @param str: The string which should be enclosed in a literal 1554 block. 1555 @type str: C{string} 1556 1557 @return: A DOM document containing C{str} in a single literal 1558 block. 1559 @rtype: L{xml.dom.minidom.Document} 1560 """ 1561 doc = Document() 1562 epytext = doc.createElement('epytext') 1563 lit = doc.createElement('literalblock') 1564 doc.appendChild(epytext) 1565 epytext.appendChild(lit) 1566 lit.appendChild(doc.createTextNode(str)) 1567 return doc
1568
1569 -def parse_as_para(str):
1570 """ 1571 Return a DOM document matching the epytext DTD, containing a 1572 single paragraph. That paragraph will include the contents of the 1573 given string. This can be used to wrap some forms of 1574 automatically generated information (such as type names) in 1575 paragraphs. 1576 1577 @param str: The string which should be enclosed in a paragraph. 1578 @type str: C{string} 1579 1580 @return: A DOM document containing C{str} in a single paragraph. 1581 @rtype: L{xml.dom.minidom.Document} 1582 """ 1583 doc = Document() 1584 epytext = doc.createElement('epytext') 1585 para = doc.createElement('para') 1586 doc.appendChild(epytext) 1587 epytext.appendChild(para) 1588 para.appendChild(doc.createTextNode(str)) 1589 return doc
1590 1591 ################################################################# 1592 ## SUPPORT FOR EPYDOC 1593 ################################################################# 1594 from epydoc.docwriter.dotgraph import * 1595
1596 -def parse_docstring(docstring, errors, **options):
1597 """ 1598 Parse the given docstring, which is formatted using epytext; and 1599 return a C{ParsedDocstring} representation of its contents. 1600 @param docstring: The docstring to parse 1601 @type docstring: C{string} 1602 @param errors: A list where any errors generated during parsing 1603 will be stored. 1604 @type errors: C{list} of L{ParseError} 1605 @param options: Extra options. Unknown options are ignored. 1606 Currently, no extra options are defined. 1607 @rtype: L{ParsedDocstring} 1608 """ 1609 return ParsedEpytextDocstring(parse(docstring, errors))
1610
1611 -class ParsedEpytextDocstring(ParsedDocstring):
1612 SYMBOL_TO_HTML = { 1613 # Symbols 1614 '<-': 'larr', '->': 'rarr', '^': 'uarr', 'v': 'darr', 1615 1616 # Greek letters 1617 'alpha': 'alpha', 'beta': 'beta', 'gamma': 'gamma', 1618 'delta': 'delta', 'epsilon': 'epsilon', 'zeta': 'zeta', 1619 'eta': 'eta', 'theta': 'theta', 'iota': 'iota', 1620 'kappa': 'kappa', 'lambda': 'lambda', 'mu': 'mu', 1621 'nu': 'nu', 'xi': 'xi', 'omicron': 'omicron', 1622 'pi': 'pi', 'rho': 'rho', 'sigma': 'sigma', 1623 'tau': 'tau', 'upsilon': 'upsilon', 'phi': 'phi', 1624 'chi': 'chi', 'psi': 'psi', 'omega': 'omega', 1625 'Alpha': 'Alpha', 'Beta': 'Beta', 'Gamma': 'Gamma', 1626 'Delta': 'Delta', 'Epsilon': 'Epsilon', 'Zeta': 'Zeta', 1627 'Eta': 'Eta', 'Theta': 'Theta', 'Iota': 'Iota', 1628 'Kappa': 'Kappa', 'Lambda': 'Lambda', 'Mu': 'Mu', 1629 'Nu': 'Nu', 'Xi': 'Xi', 'Omicron': 'Omicron', 1630 'Pi': 'Pi', 'Rho': 'Rho', 'Sigma': 'Sigma', 1631 'Tau': 'Tau', 'Upsilon': 'Upsilon', 'Phi': 'Phi', 1632 'Chi': 'Chi', 'Psi': 'Psi', 'Omega': 'Omega', 1633 1634 # HTML character entities 1635 'larr': 'larr', 'rarr': 'rarr', 'uarr': 'uarr', 1636 'darr': 'darr', 'harr': 'harr', 'crarr': 'crarr', 1637 'lArr': 'lArr', 'rArr': 'rArr', 'uArr': 'uArr', 1638 'dArr': 'dArr', 'hArr': 'hArr', 1639 'copy': 'copy', 'times': 'times', 'forall': 'forall', 1640 'exist': 'exist', 'part': 'part', 1641 'empty': 'empty', 'isin': 'isin', 'notin': 'notin', 1642 'ni': 'ni', 'prod': 'prod', 'sum': 'sum', 1643 'prop': 'prop', 'infin': 'infin', 'ang': 'ang', 1644 'and': 'and', 'or': 'or', 'cap': 'cap', 'cup': 'cup', 1645 'int': 'int', 'there4': 'there4', 'sim': 'sim', 1646 'cong': 'cong', 'asymp': 'asymp', 'ne': 'ne', 1647 'equiv': 'equiv', 'le': 'le', 'ge': 'ge', 1648 'sub': 'sub', 'sup': 'sup', 'nsub': 'nsub', 1649 'sube': 'sube', 'supe': 'supe', 'oplus': 'oplus', 1650 'otimes': 'otimes', 'perp': 'perp', 1651 1652 # Alternate (long) names 1653 'infinity': 'infin', 'integral': 'int', 'product': 'prod', 1654 '<=': 'le', '>=': 'ge', 1655 } 1656 1657 SYMBOL_TO_LATEX = { 1658 # Symbols 1659 '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)', 1660 '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)', 1661 1662 # Greek letters (use lower case when upcase not available) 1663 1664 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma': 1665 r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon': 1666 r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)', 1667 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa': 1668 r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)', 1669 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi': 1670 r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau': 1671 r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)', 1672 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega': 1673 r'\(\omega\)', 1674 1675 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma': 1676 r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon': 1677 r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)', 1678 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa': 1679 r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)', 1680 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi': 1681 r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau': 1682 r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)', 1683 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega': 1684 r'\(\Omega\)', 1685 1686 # HTML character entities 1687 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr': 1688 r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr': 1689 r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)', 1690 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr': 1691 r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr': 1692 r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}', 1693 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist': 1694 r'\(\exists\)', 'part': r'\(\partial\)', 'empty': 1695 r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)', 1696 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)', 1697 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang': 1698 r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap': 1699 r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4': 1700 r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)', 1701 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv': 1702 r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub': 1703 r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)', 1704 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus': 1705 r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)', 1706 1707 # Alternate (long) names 1708 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product': 1709 r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)', 1710 } 1711
1712 - def __init__(self, dom_tree):
1713 if isinstance(dom_tree, Document): 1714 dom_tree = dom_tree.childNodes[0] 1715 self._tree = dom_tree 1716 # Caching: 1717 self._html = self._latex = self._plaintext = None 1718 self._terms = None
1719
1720 - def to_html(self, docstring_linker, directory=None, docindex=None, 1721 context=None, **options):
1722 if self._html is not None: return self._html 1723 if self._tree is None: return '' 1724 indent = options.get('indent', 0) 1725 self._html = self._to_html(self._tree, docstring_linker, directory, 1726 docindex, context, indent) 1727 return self._html
1728
1729 - def to_latex(self, docstring_linker, **options):
1730 if self._latex is not None: return self._latex 1731 if self._tree is None: return '' 1732 indent = options.get('indent', 0) 1733 self._hyperref = options.get('hyperref', 1) 1734 self._latex = self._to_latex(self._tree, docstring_linker, indent) 1735 return self._latex
1736
1737 - def to_plaintext(self, docstring_linker, **options):
1738 # [XX] don't cache -- different options might be used!! 1739 #if self._plaintext is not None: return self._plaintext 1740 if self._tree is None: return '' 1741 if 'indent' in options: 1742 self._plaintext = to_plaintext(self._tree, 1743 indent=options['indent']) 1744 else: 1745 self._plaintext = to_plaintext(self._tree) 1746 return self._plaintext
1747
1748 - def _index_term_key(self, tree):
1749 str = to_plaintext(tree) 1750 str = re.sub(r'\s\s+', '-', str) 1751 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1752
1753 - def _to_html(self, tree, linker, directory, docindex, context, 1754 indent=0, seclevel=0):
1755 if isinstance(tree, Text): 1756 return plaintext_to_html(tree.data) 1757 1758 if tree.tagName == 'epytext': indent -= 2 1759 if tree.tagName == 'section': seclevel += 1 1760 1761 # Process the variables first. 1762 variables = [self._to_html(c, linker, directory, docindex, context, 1763 indent+2, seclevel) 1764 for c in tree.childNodes] 1765 1766 # Get rid of unnecessary <P>...</P> tags; they introduce extra 1767 # space on most browsers that we don't want. 1768 for i in range(len(variables)-1): 1769 if (not isinstance(tree.childNodes[i], Text) and 1770 tree.childNodes[i].tagName == 'para' and 1771 (isinstance(tree.childNodes[i+1], Text) or 1772 tree.childNodes[i+1].tagName != 'para')): 1773 variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n' 1774 if (tree.hasChildNodes() and 1775 not isinstance(tree.childNodes[-1], Text) and 1776 tree.childNodes[-1].tagName == 'para'): 1777 variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n' 1778 1779 # Construct the HTML string for the variables. 1780 childstr = ''.join(variables) 1781 1782 # Perform the approriate action for the DOM tree type. 1783 if tree.tagName == 'para': 1784 return wordwrap('<p>%s</p>' % childstr, indent) 1785 elif tree.tagName == 'code': 1786 return '<code>%s</code>' % childstr 1787 elif tree.tagName == 'uri': 1788 return ('<a href="%s" target="_top">%s</a>' % 1789 (variables[1], variables[0])) 1790 elif tree.tagName == 'link': 1791 return linker.translate_identifier_xref(variables[1], variables[0]) 1792 elif tree.tagName == 'italic': 1793 return '<i>%s</i>' % childstr 1794 elif tree.tagName == 'math': 1795 return '<i class="math">%s</i>' % childstr 1796 elif tree.tagName == 'indexed': 1797 term = tree.cloneNode(1) 1798 term.tagName = 'epytext' 1799 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 1800 #term_key = self._index_term_key(tree) 1801 #return linker.translate_indexterm(childstr, term_key) 1802 elif tree.tagName == 'bold': 1803 return '<b>%s</b>' % childstr 1804 elif tree.tagName == 'ulist': 1805 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') 1806 elif tree.tagName == 'olist': 1807 startAttr = tree.getAttributeNode('start') 1808 if startAttr: start = ' start="%s"' % startAttr.value 1809 else: start = '' 1810 return ('%s<ol%s>\n%s%s</ol>\n' % 1811 (indent*' ', start, childstr, indent*' ')) 1812 elif tree.tagName == 'li': 1813 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') 1814 elif tree.tagName == 'heading': 1815 return ('%s<h%s class="heading">%s</h%s>\n' % 1816 ((indent-2)*' ', seclevel, childstr, seclevel)) 1817 elif tree.tagName == 'literalblock': 1818 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr 1819 elif tree.tagName == 'doctestblock': 1820 dtb = colorize_doctestblock(childstr.strip()) 1821 return '<pre class="doctestblock">\n%s</pre>\n' % dtb 1822 elif tree.tagName == 'fieldlist': 1823 raise AssertionError("There should not be any field lists left") 1824 elif tree.tagName in ('epytext', 'section', 'tag', 'arg', 1825 'name', 'target', 'html'): 1826 return childstr 1827 elif tree.tagName == 'symbol': 1828 symbol = tree.childNodes[0].data 1829 if self.SYMBOL_TO_HTML.has_key(symbol): 1830 return '&%s;' % self.SYMBOL_TO_HTML[symbol] 1831 else: 1832 return '[??]' 1833 elif tree.tagName == 'graph': 1834 # Generate the graph. 1835 graph = self._build_graph(variables[0], variables[1:], linker, 1836 docindex, context) 1837 if not graph: return '' 1838 # Write the graph. 1839 image_url = '%s.gif' % graph.uid 1840 image_file = os.path.join(directory, image_url) 1841 return graph.to_html(image_file, image_url) 1842 else: 1843 raise ValueError('Unknown epytext DOM element %r' % tree.tagName)
1844 1845 #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']
1846 - def _build_graph(self, graph_type, graph_args, linker, 1847 docindex, context):
1848 # Generate the graph 1849 if graph_type == 'classtree': 1850 if graph_args: 1851 bases = [docindex.find(name, context) 1852 for name in graph_args] 1853 elif isinstance(context, ClassDoc): 1854 bases = [context] 1855 else: 1856 log.warning("Could not construct class tree: you must " 1857 "specify one or more base classes.") 1858 return None 1859 return class_tree_graph(bases, linker, context) 1860 elif graph_type == 'packagetree': 1861 if graph_args: 1862 packages = [docindex.find(name, context) 1863 for name in graph_args] 1864 elif isinstance(context, ModuleDoc): 1865 packages = [context] 1866 else: 1867 log.warning("Could not construct package tree: you must " 1868 "specify one or more root packages.") 1869 return None 1870 return package_tree_graph(packages, linker, context) 1871 elif graph_type == 'importgraph': 1872 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] 1873 return import_graph(modules, docindex, linker, context) 1874 1875 elif graph_type == 'callgraph': 1876 if graph_args: 1877 docs = [docindex.find(name, context) for name in graph_args] 1878 docs = [doc for doc in docs if doc is not None] 1879 else: 1880 docs = [context] 1881 return call_graph(docs, docindex, linker, context) 1882 else: 1883 log.warning("Unknown graph type %s" % graph_type)
1884 1885
1886 - def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
1887 if isinstance(tree, Text): 1888 return plaintext_to_latex(tree.data, breakany=breakany) 1889 1890 if tree.tagName == 'section': seclevel += 1 1891 1892 # Figure out the child indent level. 1893 if tree.tagName == 'epytext': cindent = indent 1894 else: cindent = indent + 2 1895 variables = [self._to_latex(c, linker, cindent, seclevel, breakany) 1896 for c in tree.childNodes] 1897 childstr = ''.join(variables) 1898 1899 if tree.tagName == 'para': 1900 return wordwrap(childstr, indent)+'\n' 1901 elif tree.tagName == 'code': 1902 return '\\texttt{%s}' % childstr 1903 elif tree.tagName == 'uri': 1904 if len(variables) != 2: raise ValueError('Bad URI ') 1905 if self._hyperref: 1906 # ~ and # should not be escaped in the URI. 1907 uri = tree.childNodes[1].childNodes[0].data 1908 uri = uri.replace('{\\textasciitilde}', '~') 1909 uri = uri.replace('\\#', '#') 1910 if variables[0] == variables[1]: 1911 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) 1912 else: 1913 return ('%s\\footnote{\\href{%s}{%s}}' % 1914 (variables[0], uri, variables[1])) 1915 else: 1916 if variables[0] == variables[1]: 1917 return '\\textit{%s}' % variables[1] 1918 else: 1919 return '%s\\footnote{%s}' % (variables[0], variables[1]) 1920 elif tree.tagName == 'link': 1921 if len(variables) != 2: raise ValueError('Bad Link') 1922 return linker.translate_identifier_xref(variables[1], variables[0]) 1923 elif tree.tagName == 'italic': 1924 return '\\textit{%s}' % childstr 1925 elif tree.tagName == 'math': 1926 return '\\textit{%s}' % childstr 1927 elif tree.tagName == 'indexed': 1928 term = tree.cloneNode(1) 1929 term.tagName = 'epytext' 1930 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 1931 elif tree.tagName == 'bold': 1932 return '\\textbf{%s}' % childstr 1933 elif tree.tagName == 'li': 1934 return indent*' ' + '\\item ' + childstr.lstrip() 1935 elif tree.tagName == 'heading': 1936 return ' '*(indent-2) + '(section) %s\n\n' % childstr 1937 elif tree.tagName == 'doctestblock': 1938 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 1939 elif tree.tagName == 'literalblock': 1940 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 1941 elif tree.tagName == 'fieldlist': 1942 return indent*' '+'{omitted fieldlist}\n' 1943 elif tree.tagName == 'olist': 1944 return (' '*indent + '\\begin{enumerate}\n\n' + 1945 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + 1946 childstr + 1947 ' '*indent + '\\end{enumerate}\n\n') 1948 elif tree.tagName == 'ulist': 1949 return (' '*indent + '\\begin{itemize}\n' + 1950 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + 1951 childstr + 1952 ' '*indent + '\\end{itemize}\n\n') 1953 elif tree.tagName == 'symbol': 1954 symbol = tree.childNodes[0].data 1955 if self.SYMBOL_TO_LATEX.has_key(symbol): 1956 return r'%s' % self.SYMBOL_TO_LATEX[symbol] 1957 else: 1958 return '[??]' 1959 elif tree.tagName == 'graph': 1960 return '(GRAPH)' 1961 #raise ValueError, 'graph not implemented yet for latex' 1962 else: 1963 # Assume that anything else can be passed through. 1964 return childstr
1965
1966 - def summary(self):
1967 if self._tree is None: return self 1968 1969 # Is the cloning that happens here safe/proper? (Cloning 1970 # between 2 different documents) 1971 tree = self._tree 1972 1973 doc = Document() 1974 epytext = doc.createElement('epytext') 1975 doc.appendChild(epytext) 1976 1977 # Find the first paragraph. 1978 variables = tree.childNodes 1979 while (len(variables) > 0) and (variables[0].tagName != 'para'): 1980 if variables[0].tagName in ('section', 'ulist', 'olist', 'li'): 1981 variables = variables[0].childNodes 1982 else: 1983 variables = variables[1:] 1984 1985 # Special case: if the docstring contains a single literal block, 1986 # then try extracting the summary from it. 1987 if (len(variables) == 0 and len(tree.childNodes) == 1 and 1988 tree.childNodes[0].tagName == 'literalblock'): 1989 str = re.split(r'\n\s*(\n|$).*', 1990 tree.childNodes[0].childNodes[0].data, 1)[0] 1991 variables = [doc.createElement('para')] 1992 variables[0].appendChild(doc.createTextNode(str)) 1993 1994 # If we didn't find a paragraph, return an empty epytext. 1995 if len(variables) == 0: return ParsedEpytextDocstring(doc) 1996 1997 # Extract the first sentence. 1998 parachildren = variables[0].childNodes 1999 para = doc.createElement('para') 2000 epytext.appendChild(para) 2001 for parachild in parachildren: 2002 if isinstance(parachild, Text): 2003 m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild.data) 2004 if m: 2005 para.appendChild(doc.createTextNode(m.group(1))) 2006 return ParsedEpytextDocstring(doc) 2007 para.appendChild(parachild.cloneNode(1)) 2008 2009 return ParsedEpytextDocstring(doc)
2010
2011 - def split_fields(self, errors=None):
2012 if self._tree is None: return (self, ()) 2013 tree = self._tree.cloneNode(1) # Hmm.. 2014 fields = [] 2015 2016 if (tree.hasChildNodes() and 2017 tree.childNodes[-1].tagName == 'fieldlist' and 2018 tree.childNodes[-1].hasChildNodes()): 2019 field_nodes = tree.childNodes[-1].childNodes 2020 tree.removeChild(tree.childNodes[-1]) 2021 2022 for field in field_nodes: 2023 # Get the tag 2024 tag = field.childNodes[0].childNodes[0].data.lower() 2025 field.removeChild(field.childNodes[0]) 2026 2027 # Get the argument. 2028 if field.childNodes and field.childNodes[0].tagName == 'arg': 2029 arg = field.childNodes[0].childNodes[0].data 2030 field.removeChild(field.childNodes[0]) 2031 else: 2032 arg = None 2033 2034 # Process the field. 2035 field.tagName = 'epytext' 2036 fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) 2037 2038 # Save the remaining docstring as the description.. 2039 if tree.hasChildNodes() and tree.childNodes[0].hasChildNodes(): 2040 descr = tree 2041 else: 2042 descr = None 2043 2044 return ParsedEpytextDocstring(descr), fields
2045
2046 - def index_terms(self):
2047 if self._terms is None: 2048 self._terms = [] 2049 self._index_terms(self._tree, self._terms) 2050 return self._terms
2051
2052 - def _index_terms(self, tree, terms):
2053 if tree is None or isinstance(tree, Text): 2054 return 2055 2056 if tree.tagName == 'indexed': 2057 term = tree.cloneNode(1) 2058 term.tagName = 'epytext' 2059 terms.append(ParsedEpytextDocstring(term)) 2060 2061 # Look for index items in child nodes. 2062 for child in tree.childNodes: 2063 self._index_terms(child, terms)
2064