Package epydoc :: Package markup
[hide private]
[frames] | no frames]

Source Code for Package epydoc.markup

  1  # 
  2  # epydoc package file 
  3  # 
  4  # A python documentation Module 
  5  # Edward Loper 
  6  # 
  7  # $Id: __init__.py 1209 2006-04-10 13:23:24Z edloper $ 
  8  # 
  9   
 10  """ 
 11  Markup language support for docstrings.  Each submodule defines a 
 12  parser for a single markup language.  These parsers convert an 
 13  object's docstring to a L{ParsedDocstring}, a standard intermediate 
 14  representation that can be used to generate output. 
 15  C{ParsedDocstring}s support the following operations: 
 16    - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>}, 
 17      L{to_html()<ParsedDocstring.to_html>}, and 
 18      L{to_latex()<ParsedDocstring.to_latex>}). 
 19    - Summarization (L{summary()<ParsedDocstring.summary>}). 
 20    - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}). 
 21    - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}. 
 22   
 23  The L{parse()} function provides a single interface to the 
 24  C{epydoc.markup} package: it takes a docstring and the name of a 
 25  markup language; delegates to the appropriate parser; and returns the 
 26  parsed docstring (along with any errors or warnings that were 
 27  generated). 
 28   
 29  The C{ParsedDocstring} output generation methods (C{to_M{format}()}) 
 30  use a L{DocstringLinker} to link the docstring output with the rest of 
 31  the documentation that epydoc generates.  C{DocstringLinker}s are 
 32  currently responsible for translating two kinds of crossreference: 
 33    - index terms (L{translate_indexterm() 
 34      <DocstringLinker.translate_indexterm>}). 
 35    - identifier crossreferences (L{translate_identifier_xref() 
 36      <DocstringLinker.translate_identifier_xref>}). 
 37   
 38  A parsed docstring's fields can be extracted using the 
 39  L{ParsedDocstring.split_fields()} method.  This method divides a 
 40  docstring into its main body and a list of L{Field}s, each of which 
 41  encodes a single field.  The field's bodies are encoded as 
 42  C{ParsedDocstring}s. 
 43   
 44  Markup errors are represented using L{ParseError}s.  These exception 
 45  classes record information about the cause, location, and severity of 
 46  each error. 
 47   
 48  @sort: parse, ParsedDocstring, Field, DocstringLinker 
 49  @group Errors and Warnings: ParseError 
 50  @group Utility Functions: parse_type_of 
 51  @var SCRWIDTH: The default width with which text will be wrapped 
 52        when formatting the output of the parser. 
 53  @type SCRWIDTH: C{int} 
 54  @var _parse_warnings: Used by L{_parse_warn}. 
 55  """ 
 56  __docformat__ = 'epytext en' 
 57   
 58  import re, types, sys 
 59  from epydoc import log 
 60  from epydoc.util import plaintext_to_html, plaintext_to_latex 
 61  import epydoc 
 62  from epydoc.compat import * 
 63   
 64  ################################################## 
 65  ## Contents 
 66  ################################################## 
 67  # 
 68  # 1. parse() dispatcher 
 69  # 2. ParsedDocstring abstract base class 
 70  # 3. Field class 
 71  # 4. Docstring Linker 
 72  # 5. ParseError exceptions 
 73  # 6. Misc helpers 
 74  # 
 75   
 76  ################################################## 
 77  ## Dispatcher 
 78  ################################################## 
 79   
 80  _markup_language_registry = { 
 81      'restructuredtext': 'epydoc.markup.restructuredtext', 
 82      'epytext': 'epydoc.markup.epytext', 
 83      'plaintext': 'epydoc.markup.plaintext', 
 84      'javadoc': 'epydoc.markup.javadoc', 
 85      } 
 86   
87 -def register_markup_language(name, parse_function):
88 """ 89 Register a new markup language named C{name}, which can be parsed 90 by the function C{parse_function}. 91 92 @param name: The name of the markup language. C{name} should be a 93 simple identifier, such as C{'epytext'} or C{'restructuredtext'}. 94 Markup language names are case insensitive. 95 96 @param parse_function: A function which can be used to parse the 97 markup language, and returns a L{ParsedDocstring}. It should 98 have the following signature: 99 100 >>> def parse(s, errors): 101 ... 'returns a ParsedDocstring' 102 103 Where: 104 - C{s} is the string to parse. (C{s} will be a unicode 105 string.) 106 - C{errors} is a list; any errors that are generated 107 during docstring parsing should be appended to this 108 list (as L{ParseError} objects). 109 """ 110 _markup_language_registry[name.lower()] = parse_function
111 112 MARKUP_LANGUAGES_USED = set() 113
114 -def parse(docstring, markup='plaintext', errors=None, **options):
115 """ 116 Parse the given docstring, and use it to construct a 117 C{ParsedDocstring}. If any fatal C{ParseError}s are encountered 118 while parsing the docstring, then the docstring will be rendered 119 as plaintext, instead. 120 121 @type docstring: C{string} 122 @param docstring: The docstring to encode. 123 @type markup: C{string} 124 @param markup: The name of the markup language that is used by 125 the docstring. If the markup language is not supported, then 126 the docstring will be treated as plaintext. The markup name 127 is case-insensitive. 128 @param errors: A list where any errors generated during parsing 129 will be stored. If no list is specified, then fatal errors 130 will generate exceptions, and non-fatal errors will be 131 ignored. 132 @type errors: C{list} of L{ParseError} 133 @rtype: L{ParsedDocstring} 134 @return: A L{ParsedDocstring} that encodes the contents of 135 C{docstring}. 136 @raise ParseError: If C{errors} is C{None} and an error is 137 encountered while parsing. 138 """ 139 # Initialize errors list. 140 raise_on_error = (errors is None) 141 if errors == None: errors = [] 142 143 # Normalize the markup language name. 144 markup = markup.lower() 145 146 # Is the markup language valid? 147 if not re.match(r'\w+', markup): 148 _parse_warn('Bad markup language name %r. Treating ' 149 'docstrings as plaintext.' % markup) 150 import epydoc.markup.plaintext as plaintext 151 return plaintext.parse_docstring(docstring, errors, **options) 152 153 # Is the markup language supported? 154 if markup not in _markup_language_registry: 155 _parse_warn('Unsupported markup language %r. Treating ' 156 'docstrings as plaintext.' % markup) 157 import epydoc.markup.plaintext as plaintext 158 return plaintext.parse_docstring(docstring, errors, **options) 159 160 # Get the parse function. 161 parse_docstring = _markup_language_registry[markup] 162 163 # If it's a string, then it names a function to import. 164 if isinstance(parse_docstring, basestring): 165 try: exec('from %s import parse_docstring' % parse_docstring) 166 except ImportError, e: 167 _parse_warn('Error importing %s for markup language %s: %s' % 168 (parse_docstring, markup, e)) 169 import epydoc.markup.plaintext as plaintext 170 return plaintext.parse_docstring(docstring, errors, **options) 171 _markup_language_registry[markup] = parse_docstring 172 173 # Keep track of which markup languages have been used so far. 174 MARKUP_LANGUAGES_USED.add(markup) 175 176 # Parse the docstring. 177 try: parsed_docstring = parse_docstring(docstring, errors, **options) 178 except KeyboardInterrupt: raise 179 except Exception, e: 180 if epydoc.DEBUG: raise 181 log.error('Internal error while parsing a docstring: %s; ' 182 'treating docstring as plaintext' % e) 183 import epydoc.markup.plaintext as plaintext 184 return plaintext.parse_docstring(docstring, errors, **options) 185 186 # Check for fatal errors. 187 fatal_errors = [e for e in errors if e.is_fatal()] 188 if fatal_errors and raise_on_error: raise fatal_errors[0] 189 if fatal_errors: 190 import epydoc.markup.plaintext as plaintext 191 return plaintext.parse_docstring(docstring, errors, **options) 192 193 return parsed_docstring
194 195 # only issue each warning once: 196 _parse_warnings = {}
197 -def _parse_warn(estr):
198 """ 199 Print a warning message. If the given error has already been 200 printed, then do nothing. 201 """ 202 global _parse_warnings 203 if _parse_warnings.has_key(estr): return 204 _parse_warnings[estr] = 1 205 log.warning(estr)
206 207 ################################################## 208 ## ParsedDocstring 209 ##################################################
210 -class ParsedDocstring:
211 """ 212 A standard intermediate representation for parsed docstrings that 213 can be used to generate output. Parsed docstrings are produced by 214 markup parsers (such as L{epytext.parse} or L{javadoc.parse}). 215 C{ParsedDocstring}s support several kinds of operation: 216 - output generation (L{to_plaintext()}, L{to_html()}, and 217 L{to_latex()}). 218 - Summarization (L{summary()}). 219 - Field extraction (L{split_fields()}). 220 - Index term extraction (L{index_terms()}. 221 222 The output generation methods (C{to_M{format}()}) use a 223 L{DocstringLinker} to link the docstring output with the rest 224 of the documentation that epydoc generates. 225 226 Subclassing 227 =========== 228 The only method that a subclass is I{required} to implement is 229 L{to_plaintext()}; but it is often useful to override the other 230 methods. The default behavior of each method is described below: 231 - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it 232 returns to generate verbatim output. 233 - C{summary}: Returns C{self} (i.e., the entire docstring). 234 - C{split_fields}: Returns C{(self, [])} (i.e., extracts no 235 fields). 236 - C{index_terms}: Returns C{[]} (i.e., extracts no index terms). 237 238 If and when epydoc adds more output formats, new C{to_I{format}} 239 methods will be added to this base class; but they will always 240 be given a default implementation. 241 """
242 - def split_fields(self, errors=None):
243 """ 244 Split this docstring into its body and its fields. 245 246 @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is 247 the main body of this docstring, and C{M{fields}} is a list 248 of its fields. 249 @rtype: C{(L{ParsedDocstring}, list of L{Field})} 250 @param errors: A list where any errors generated during 251 splitting will be stored. If no list is specified, then 252 errors will be ignored. 253 @type errors: C{list} of L{ParseError} 254 """ 255 # Default behavior: 256 return self, []
257
258 - def summary(self):
259 """ 260 @return: A short summary of this docstring. Typically, the 261 summary consists of the first sentence of the docstring. 262 @rtype: L{ParsedDocstring} 263 """ 264 # Default behavior: 265 return self
266
267 - def concatenate(self, other):
268 """ 269 @return: A new parsed docstring containing the concatination 270 of this docstring and C{other}. 271 @raise ValueError: If the two parsed docstrings are 272 incompatible. 273 """ 274 return ConcatenatedDocstring(self, other)
275
276 - def __add__(self, other): return self.concatenate(other)
277
278 - def to_html(self, docstring_linker, **options):
279 """ 280 Translate this docstring to HTML. 281 282 @param docstring_linker: An HTML translator for crossreference 283 links into and out of the docstring. 284 @type docstring_linker: L{DocstringLinker} 285 @param options: Any extra options for the output. Unknown 286 options are ignored. 287 @return: An HTML fragment that encodes this docstring. 288 @rtype: C{string} 289 """ 290 # Default behavior: 291 plaintext = plaintext_to_html(self.to_plaintext(docstring_linker)) 292 return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext
293
294 - def to_latex(self, docstring_linker, **options):
295 """ 296 Translate this docstring to LaTeX. 297 298 @param docstring_linker: A LaTeX translator for crossreference 299 links into and out of the docstring. 300 @type docstring_linker: L{DocstringLinker} 301 @param options: Any extra options for the output. Unknown 302 options are ignored. 303 @return: A LaTeX fragment that encodes this docstring. 304 @rtype: C{string} 305 """ 306 # Default behavior: 307 plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker)) 308 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext
309
310 - def to_plaintext(self, docstring_linker, **options):
311 """ 312 Translate this docstring to plaintext. 313 314 @param docstring_linker: A plaintext translator for 315 crossreference links into and out of the docstring. 316 @type docstring_linker: L{DocstringLinker} 317 @param options: Any extra options for the output. Unknown 318 options are ignored. 319 @return: A plaintext fragment that encodes this docstring. 320 @rtype: C{string} 321 """ 322 raise NotImplementedError, 'ParsedDocstring.to_plaintext()'
323
324 - def index_terms(self):
325 """ 326 @return: The list of index terms that are defined in this 327 docstring. Each of these items will be added to the index 328 page of the documentation. 329 @rtype: C{list} of C{ParsedDocstring} 330 """ 331 # Default behavior: 332 return []
333 334 ################################################## 335 ## Concatenated Docstring 336 ##################################################
338 - def __init__(self, *parsed_docstrings):
339 self._parsed_docstrings = parsed_docstrings
340
341 - def split_fields(self, errors=None):
342 bodies = [] 343 fields = [] 344 for doc in self._parsed_docstrings: 345 b,f = doc.split_fields() 346 bodies.append(b) 347 fields.extend(f) 348 349 return ConcatenatedDocstring(*bodies), fields
350
351 - def summary(self):
352 return self._parsed_docstrings[0].summary()
353
354 - def to_html(self, docstring_linker, **options):
355 htmlstring = '' 356 for doc in self._parsed_docstrings: 357 htmlstring += doc.to_html(docstring_linker, **options) 358 return htmlstring
359
360 - def to_latex(self, docstring_linker, **options):
361 latexstring = '' 362 for doc in self._parsed_docstrings: 363 latexstring += doc.to_latex(docstring_linker, **options) 364 return latexstring
365
366 - def to_plaintext(self, docstring_linker, **options):
367 textstring = '' 368 for doc in self._parsed_docstrings: 369 textstring += doc.to_plaintext(docstring_linker, **options) 370 return textstring
371
372 - def index_terms(self):
373 terms = [] 374 for doc in self._parsed_docstrings: 375 terms += doc.index_terms() 376 return terms
377 378 ################################################## 379 ## Fields 380 ##################################################
381 -class Field:
382 """ 383 The contents of a docstring's field. Docstring fields are used 384 to describe specific aspects of an object, such as a parameter of 385 a function or the author of a module. Each field consists of a 386 tag, an optional argument, and a body: 387 - The tag specifies the type of information that the field 388 encodes. 389 - The argument specifies the object that the field describes. 390 The argument may be C{None} or a C{string}. 391 - The body contains the field's information. 392 393 Tags are automatically downcased and stripped; and arguments are 394 automatically stripped. 395 """
396 - def __init__(self, tag, arg, body):
397 self._tag = tag.lower().strip() 398 if arg is None: self._arg = None 399 else: self._arg = arg.strip() 400 self._body = body
401
402 - def tag(self):
403 """ 404 @return: This field's tag. 405 @rtype: C{string} 406 """ 407 return self._tag
408
409 - def arg(self):
410 """ 411 @return: This field's argument, or C{None} if this field has 412 no argument. 413 @rtype: C{string} or C{None} 414 """ 415 return self._arg
416
417 - def body(self):
418 """ 419 @return: This field's body. 420 @rtype: L{ParsedDocstring} 421 """ 422 return self._body
423
424 - def __repr__(self):
425 if self._arg is None: 426 return '<Field @%s: ...>' % self._tag 427 else: 428 return '<Field @%s %s: ...>' % (self._tag, self._arg)
429 430 ################################################## 431 ## Docstring Linker (resolves crossreferences) 432 ##################################################
433 -class DocstringLinker:
434 """ 435 A translator for crossreference links into and out of a 436 C{ParsedDocstring}. C{DocstringLinker} is used by 437 C{ParsedDocstring} to convert these crossreference links into 438 appropriate output formats. For example, 439 C{DocstringLinker.to_html} expects a C{DocstringLinker} that 440 converts crossreference links to HTML. 441 """
442 - def translate_indexterm(self, indexterm):
443 """ 444 Translate an index term to the appropriate output format. The 445 output will typically include a crossreference anchor. 446 447 @type indexterm: L{ParsedDocstring} 448 @param indexterm: The index term to translate. 449 @rtype: C{string} 450 @return: The translated index term. 451 """ 452 raise NotImplementedError, 'DocstringLinker.translate_indexterm()'
453
454 - def translate_identifier_xref(self, identifier, label=None):
455 """ 456 Translate a crossreference link to a Python identifier to the 457 appropriate output format. The output will typically include 458 a reference or pointer to the crossreference target. 459 460 @type identifier: C{string} 461 @param identifier: The name of the Python identifier that 462 should be linked to. 463 @type label: C{string} or C{None} 464 @param label: The label that should be used for the identifier, 465 if it's different from the name of the identifier. 466 @rtype: C{string} 467 @return: The translated crossreference link. 468 """ 469 raise NotImplementedError, 'DocstringLinker.translate_xref()'
470 471 ################################################## 472 ## ParseError exceptions 473 ################################################## 474
475 -class ParseError(Exception):
476 """ 477 The base class for errors generated while parsing docstrings. 478 479 @ivar _linenum: The line on which the error occured within the 480 docstring. The linenum of the first line is 0. 481 @type _linenum: C{int} 482 @ivar _offset: The line number where the docstring begins. This 483 offset is added to C{_linenum} when displaying the line number 484 of the error. Default value: 1. 485 @type _offset: C{int} 486 @ivar _descr: A description of the error. 487 @type _descr: C{string} 488 @ivar _fatal: True if this is a fatal error. 489 @type _fatal: C{boolean} 490 """
491 - def __init__(self, descr, linenum=None, is_fatal=1):
492 """ 493 @type descr: C{string} 494 @param descr: A description of the error. 495 @type linenum: C{int} 496 @param linenum: The line on which the error occured within 497 the docstring. The linenum of the first line is 0. 498 @type is_fatal: C{boolean} 499 @param is_fatal: True if this is a fatal error. 500 """ 501 self._descr = descr 502 self._linenum = linenum 503 self._fatal = is_fatal 504 self._offset = 1
505
506 - def is_fatal(self):
507 """ 508 @return: true if this is a fatal error. If an error is fatal, 509 then epydoc should ignore the output of the parser, and 510 parse the docstring as plaintext. 511 @rtype: C{boolean} 512 """ 513 return self._fatal
514
515 - def linenum(self):
516 """ 517 @return: The line number on which the error occured (including 518 any offset). If the line number is unknown, then return 519 C{None}. 520 @rtype: C{int} or C{None} 521 """ 522 if self._linenum is None: return None 523 else: return self._offset + self._linenum
524
525 - def set_linenum_offset(self, offset):
526 """ 527 Set the line number offset for this error. This offset is the 528 line number where the docstring begins. This offset is added 529 to C{_linenum} when displaying the line number of the error. 530 531 @param offset: The new line number offset. 532 @type offset: C{int} 533 @rtype: C{None} 534 """ 535 self._offset = offset
536
537 - def descr(self):
538 return self._descr
539
540 - def __str__(self):
541 """ 542 Return a string representation of this C{ParseError}. This 543 multi-line string contains a description of the error, and 544 specifies where it occured. 545 546 @return: the informal representation of this C{ParseError}. 547 @rtype: C{string} 548 """ 549 if self._linenum is not None: 550 return 'Line %s: %s' % (self._linenum+self._offset, self.descr()) 551 else: 552 return self.descr()
553
554 - def __repr__(self):
555 """ 556 Return the formal representation of this C{ParseError}. 557 C{ParseError}s have formal representations of the form:: 558 <ParseError on line 12> 559 560 @return: the formal representation of this C{ParseError}. 561 @rtype: C{string} 562 """ 563 if self._linenum is None: 564 return '<ParseError on line %d' % self._offset 565 else: 566 return '<ParseError on line %d>' % (self._linenum+self._offset)
567
568 - def __cmp__(self, other):
569 """ 570 Compare two C{ParseError}s, based on their line number. 571 - Return -1 if C{self.linenum<other.linenum} 572 - Return +1 if C{self.linenum>other.linenum} 573 - Return 0 if C{self.linenum==other.linenum}. 574 The return value is undefined if C{other} is not a 575 ParseError. 576 577 @rtype: C{int} 578 """ 579 if not isinstance(other, ParseError): return -1000 580 return cmp(self._linenum+self._offset, 581 other._linenum+other._offset)
582 583 ################################################## 584 ## Misc helpers 585 ################################################## 586 # These are used by multiple markup parsers 587
588 -def parse_type_of(obj):
589 """ 590 @return: A C{ParsedDocstring} that encodes the type of the given 591 object. 592 @rtype: L{ParsedDocstring} 593 @param obj: The object whose type should be returned as DOM document. 594 @type obj: any 595 """ 596 # This is a bit hackish; oh well. :) 597 from epydoc.markup.epytext import ParsedEpytextDocstring 598 from xml.dom.minidom import Document 599 doc = Document() 600 epytext = doc.createElement('epytext') 601 para = doc.createElement('para') 602 doc.appendChild(epytext) 603 epytext.appendChild(para) 604 605 if type(obj) is types.InstanceType: 606 link = doc.createElement('link') 607 name = doc.createElement('name') 608 target = doc.createElement('target') 609 para.appendChild(link) 610 link.appendChild(name) 611 link.appendChild(target) 612 name.appendChild(doc.createTextNode(str(obj.__class__.__name__))) 613 target.appendChild(doc.createTextNode(str(obj.__class__))) 614 else: 615 code = doc.createElement('code') 616 para.appendChild(code) 617 code.appendChild(doc.createTextNode(type(obj).__name__)) 618 return ParsedEpytextDocstring(doc)
619