1   
  2   
  3   
  4   
  5   
  6   
  7   
  8   
  9   
 10  """ 
 11  Markup language support for docstrings.  Each submodule defines a 
 12  parser for a single markup language.  These parsers convert an 
 13  object's docstring to a L{ParsedDocstring}, a standard intermediate 
 14  representation that can be used to generate output. 
 15  C{ParsedDocstring}s support the following operations: 
 16    - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>}, 
 17      L{to_html()<ParsedDocstring.to_html>}, and 
 18      L{to_latex()<ParsedDocstring.to_latex>}). 
 19    - Summarization (L{summary()<ParsedDocstring.summary>}). 
 20    - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}). 
 21    - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}. 
 22   
 23  The L{parse()} function provides a single interface to the 
 24  C{epydoc.markup} package: it takes a docstring and the name of a 
 25  markup language; delegates to the appropriate parser; and returns the 
 26  parsed docstring (along with any errors or warnings that were 
 27  generated). 
 28   
 29  The C{ParsedDocstring} output generation methods (C{to_M{format}()}) 
 30  use a L{DocstringLinker} to link the docstring output with the rest of 
 31  the documentation that epydoc generates.  C{DocstringLinker}s are 
 32  currently responsible for translating two kinds of crossreference: 
 33    - index terms (L{translate_indexterm() 
 34      <DocstringLinker.translate_indexterm>}). 
 35    - identifier crossreferences (L{translate_identifier_xref() 
 36      <DocstringLinker.translate_identifier_xref>}). 
 37   
 38  A parsed docstring's fields can be extracted using the 
 39  L{ParsedDocstring.split_fields()} method.  This method divides a 
 40  docstring into its main body and a list of L{Field}s, each of which 
 41  encodes a single field.  The field's bodies are encoded as 
 42  C{ParsedDocstring}s. 
 43   
 44  Markup errors are represented using L{ParseError}s.  These exception 
 45  classes record information about the cause, location, and severity of 
 46  each error. 
 47   
 48  @sort: parse, ParsedDocstring, Field, DocstringLinker 
 49  @group Errors and Warnings: ParseError 
 50  @group Utility Functions: parse_type_of 
 51  @var SCRWIDTH: The default width with which text will be wrapped 
 52        when formatting the output of the parser. 
 53  @type SCRWIDTH: C{int} 
 54  @var _parse_warnings: Used by L{_parse_warn}. 
 55  """ 
 56  __docformat__ = 'epytext en' 
 57   
 58  import re, types, sys 
 59  from epydoc import log 
 60  from epydoc.util import plaintext_to_html, plaintext_to_latex 
 61  import epydoc 
 62  from epydoc.compat import * 
 63   
 64   
 65   
 66   
 67   
 68   
 69   
 70   
 71   
 72   
 73   
 74   
 75   
 76   
 77   
 78   
 79   
 80  _markup_language_registry = { 
 81      'restructuredtext': 'epydoc.markup.restructuredtext', 
 82      'epytext': 'epydoc.markup.epytext', 
 83      'plaintext': 'epydoc.markup.plaintext', 
 84      'javadoc': 'epydoc.markup.javadoc', 
 85      } 
 86   
 88      """ 
 89      Register a new markup language named C{name}, which can be parsed 
 90      by the function C{parse_function}. 
 91   
 92      @param name: The name of the markup language.  C{name} should be a 
 93      simple identifier, such as C{'epytext'} or C{'restructuredtext'}. 
 94      Markup language names are case insensitive. 
 95   
 96      @param parse_function: A function which can be used to parse the 
 97          markup language, and returns a L{ParsedDocstring}.  It should 
 98          have the following signature: 
 99   
100              >>> def parse(s, errors): 
101              ...     'returns a ParsedDocstring' 
102   
103          Where: 
104              - C{s} is the string to parse.  (C{s} will be a unicode 
105                string.) 
106              - C{errors} is a list; any errors that are generated 
107                during docstring parsing should be appended to this 
108                list (as L{ParseError} objects). 
109      """ 
110      _markup_language_registry[name.lower()] = parse_function 
 111   
112  MARKUP_LANGUAGES_USED = set() 
113   
114 -def parse(docstring, markup='plaintext', errors=None, **options): 
 115      """ 
116      Parse the given docstring, and use it to construct a 
117      C{ParsedDocstring}.  If any fatal C{ParseError}s are encountered 
118      while parsing the docstring, then the docstring will be rendered 
119      as plaintext, instead. 
120   
121      @type docstring: C{string} 
122      @param docstring: The docstring to encode. 
123      @type markup: C{string} 
124      @param markup: The name of the markup language that is used by 
125          the docstring.  If the markup language is not supported, then 
126          the docstring will be treated as plaintext.  The markup name 
127          is case-insensitive. 
128      @param errors: A list where any errors generated during parsing 
129          will be stored.  If no list is specified, then fatal errors 
130          will generate exceptions, and non-fatal errors will be 
131          ignored. 
132      @type errors: C{list} of L{ParseError} 
133      @rtype: L{ParsedDocstring} 
134      @return: A L{ParsedDocstring} that encodes the contents of 
135          C{docstring}. 
136      @raise ParseError: If C{errors} is C{None} and an error is 
137          encountered while parsing. 
138      """ 
139       
140      raise_on_error = (errors is None) 
141      if errors == None: errors = [] 
142   
143       
144      markup = markup.lower() 
145   
146       
147      if not re.match(r'\w+', markup): 
148          _parse_warn('Bad markup language name %r.  Treating ' 
149                      'docstrings as plaintext.' % markup) 
150          import epydoc.markup.plaintext as plaintext 
151          return plaintext.parse_docstring(docstring, errors, **options) 
152   
153       
154      if markup not in _markup_language_registry: 
155          _parse_warn('Unsupported markup language %r.  Treating ' 
156                      'docstrings as plaintext.' % markup) 
157          import epydoc.markup.plaintext as plaintext 
158          return plaintext.parse_docstring(docstring, errors, **options) 
159   
160       
161      parse_docstring = _markup_language_registry[markup] 
162   
163       
164      if isinstance(parse_docstring, basestring): 
165          try: exec('from %s import parse_docstring' % parse_docstring) 
166          except ImportError, e: 
167              _parse_warn('Error importing %s for markup language %s: %s' % 
168                          (parse_docstring, markup, e)) 
169              import epydoc.markup.plaintext as plaintext 
170              return plaintext.parse_docstring(docstring, errors, **options) 
171          _markup_language_registry[markup] = parse_docstring 
172   
173       
174      MARKUP_LANGUAGES_USED.add(markup) 
175   
176       
177      try: parsed_docstring = parse_docstring(docstring, errors, **options) 
178      except KeyboardInterrupt: raise 
179      except Exception, e: 
180          if epydoc.DEBUG: raise 
181          log.error('Internal error while parsing a docstring: %s; ' 
182                    'treating docstring as plaintext' % e) 
183          import epydoc.markup.plaintext as plaintext 
184          return plaintext.parse_docstring(docstring, errors, **options) 
185   
186       
187      fatal_errors = [e for e in errors if e.is_fatal()] 
188      if fatal_errors and raise_on_error: raise fatal_errors[0] 
189      if fatal_errors: 
190          import epydoc.markup.plaintext as plaintext 
191          return plaintext.parse_docstring(docstring, errors, **options) 
192   
193      return parsed_docstring 
 194   
195   
196  _parse_warnings = {} 
206   
207   
208   
209   
211      """ 
212      A standard intermediate representation for parsed docstrings that 
213      can be used to generate output.  Parsed docstrings are produced by 
214      markup parsers (such as L{epytext.parse} or L{javadoc.parse}). 
215      C{ParsedDocstring}s support several kinds of operation:     
216        - output generation (L{to_plaintext()}, L{to_html()}, and 
217          L{to_latex()}). 
218        - Summarization (L{summary()}). 
219        - Field extraction (L{split_fields()}). 
220        - Index term extraction (L{index_terms()}. 
221   
222      The output generation methods (C{to_M{format}()}) use a 
223      L{DocstringLinker} to link the docstring output with the rest 
224      of the documentation that epydoc generates. 
225   
226      Subclassing 
227      =========== 
228      The only method that a subclass is I{required} to implement is 
229      L{to_plaintext()}; but it is often useful to override the other 
230      methods.  The default behavior of each method is described below: 
231        - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it 
232          returns to generate verbatim output. 
233        - C{summary}: Returns C{self} (i.e., the entire docstring). 
234        - C{split_fields}: Returns C{(self, [])} (i.e., extracts no 
235          fields). 
236        - C{index_terms}: Returns C{[]} (i.e., extracts no index terms). 
237   
238      If and when epydoc adds more output formats, new C{to_I{format}} 
239      methods will be added to this base class; but they will always 
240      be given a default implementation. 
241      """ 
243          """ 
244          Split this docstring into its body and its fields. 
245           
246          @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is 
247              the main body of this docstring, and C{M{fields}} is a list 
248              of its fields. 
249          @rtype: C{(L{ParsedDocstring}, list of L{Field})} 
250          @param errors: A list where any errors generated during 
251              splitting will be stored.  If no list is specified, then 
252              errors will be ignored. 
253          @type errors: C{list} of L{ParseError} 
254          """ 
255           
256          return self, [] 
257   
259          """ 
260          @return: A short summary of this docstring.  Typically, the 
261              summary consists of the first sentence of the docstring. 
262          @rtype: L{ParsedDocstring} 
263          """ 
264           
265          return self 
266   
268          """ 
269          @return: A new parsed docstring containing the concatination 
270              of this docstring and C{other}. 
271          @raise ValueError: If the two parsed docstrings are 
272              incompatible. 
273          """ 
274          return ConcatenatedDocstring(self, other) 
 275   
277   
278 -    def to_html(self, docstring_linker, **options): 
 279          """ 
280          Translate this docstring to HTML. 
281   
282          @param docstring_linker: An HTML translator for crossreference 
283              links into and out of the docstring. 
284          @type docstring_linker: L{DocstringLinker} 
285          @param options: Any extra options for the output.  Unknown 
286              options are ignored. 
287          @return: An HTML fragment that encodes this docstring. 
288          @rtype: C{string} 
289          """ 
290           
291          plaintext = plaintext_to_html(self.to_plaintext(docstring_linker)) 
292          return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext 
 293   
294 -    def to_latex(self, docstring_linker, **options): 
 295          """ 
296          Translate this docstring to LaTeX. 
297           
298          @param docstring_linker: A LaTeX translator for crossreference 
299              links into and out of the docstring. 
300          @type docstring_linker: L{DocstringLinker} 
301          @param options: Any extra options for the output.  Unknown 
302              options are ignored. 
303          @return: A LaTeX fragment that encodes this docstring. 
304          @rtype: C{string} 
305          """ 
306           
307          plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker)) 
308          return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext 
 309   
310 -    def to_plaintext(self, docstring_linker, **options): 
 311          """ 
312          Translate this docstring to plaintext. 
313           
314          @param docstring_linker: A plaintext translator for 
315              crossreference links into and out of the docstring. 
316          @type docstring_linker: L{DocstringLinker} 
317          @param options: Any extra options for the output.  Unknown 
318              options are ignored. 
319          @return: A plaintext fragment that encodes this docstring. 
320          @rtype: C{string} 
321          """ 
322          raise NotImplementedError, 'ParsedDocstring.to_plaintext()' 
323   
325          """ 
326          @return: The list of index terms that are defined in this 
327              docstring.  Each of these items will be added to the index 
328              page of the documentation. 
329          @rtype: C{list} of C{ParsedDocstring} 
330          """ 
331           
332          return [] 
 333   
334   
335   
336   
338 -    def __init__(self, *parsed_docstrings): 
 339          self._parsed_docstrings = parsed_docstrings 
340           
342          bodies = [] 
343          fields = [] 
344          for doc in self._parsed_docstrings: 
345              b,f = doc.split_fields() 
346              bodies.append(b) 
347              fields.extend(f) 
348   
349          return ConcatenatedDocstring(*bodies), fields 
 350   
352          return self._parsed_docstrings[0].summary() 
 353   
354 -    def to_html(self, docstring_linker, **options): 
 355          htmlstring = '' 
356          for doc in self._parsed_docstrings: 
357              htmlstring += doc.to_html(docstring_linker, **options) 
358          return htmlstring 
 359   
360 -    def to_latex(self, docstring_linker, **options): 
 361          latexstring = '' 
362          for doc in self._parsed_docstrings: 
363              latexstring += doc.to_latex(docstring_linker, **options) 
364          return latexstring 
 365   
366 -    def to_plaintext(self, docstring_linker, **options): 
 367          textstring = '' 
368          for doc in self._parsed_docstrings: 
369              textstring += doc.to_plaintext(docstring_linker, **options) 
370          return textstring 
 371   
373          terms = [] 
374          for doc in self._parsed_docstrings: 
375              terms += doc.index_terms() 
376          return terms 
 382      """ 
383      The contents of a docstring's field.  Docstring fields are used 
384      to describe specific aspects of an object, such as a parameter of 
385      a function or the author of a module.  Each field consists of a 
386      tag, an optional argument, and a body: 
387        - The tag specifies the type of information that the field 
388          encodes. 
389        - The argument specifies the object that the field describes. 
390          The argument may be C{None} or a C{string}. 
391        - The body contains the field's information. 
392   
393      Tags are automatically downcased and stripped; and arguments are 
394      automatically stripped. 
395      """ 
397          self._tag = tag.lower().strip() 
398          if arg is None: self._arg = None 
399          else: self._arg = arg.strip() 
400          self._body = body 
 401   
403          """ 
404          @return: This field's tag. 
405          @rtype: C{string} 
406          """ 
407          return self._tag 
408   
410          """ 
411          @return: This field's argument, or C{None} if this field has 
412              no argument. 
413          @rtype: C{string} or C{None} 
414          """ 
415          return self._arg 
416   
418          """ 
419          @return: This field's body. 
420          @rtype: L{ParsedDocstring} 
421          """ 
422          return self._body 
423   
425          if self._arg is None: 
426              return '<Field @%s: ...>' % self._tag 
427          else: 
428              return '<Field @%s %s: ...>' % (self._tag, self._arg) 
 429   
430   
431   
432   
434      """ 
435      A translator for crossreference links into and out of a 
436      C{ParsedDocstring}.  C{DocstringLinker} is used by 
437      C{ParsedDocstring} to convert these crossreference links into 
438      appropriate output formats.  For example, 
439      C{DocstringLinker.to_html} expects a C{DocstringLinker} that 
440      converts crossreference links to HTML. 
441      """ 
443          """ 
444          Translate an index term to the appropriate output format.  The 
445          output will typically include a crossreference anchor. 
446   
447          @type indexterm: L{ParsedDocstring} 
448          @param indexterm: The index term to translate. 
449          @rtype: C{string} 
450          @return: The translated index term. 
451          """ 
452          raise NotImplementedError, 'DocstringLinker.translate_indexterm()' 
453   
455          """ 
456          Translate a crossreference link to a Python identifier to the 
457          appropriate output format.  The output will typically include 
458          a reference or pointer to the crossreference target. 
459   
460          @type identifier: C{string} 
461          @param identifier: The name of the Python identifier that 
462              should be linked to. 
463          @type label: C{string} or C{None} 
464          @param label: The label that should be used for the identifier, 
465              if it's different from the name of the identifier. 
466          @rtype: C{string} 
467          @return: The translated crossreference link. 
468          """ 
469          raise NotImplementedError, 'DocstringLinker.translate_xref()' 
 476      """ 
477      The base class for errors generated while parsing docstrings. 
478   
479      @ivar _linenum: The line on which the error occured within the 
480          docstring.  The linenum of the first line is 0. 
481      @type _linenum: C{int} 
482      @ivar _offset: The line number where the docstring begins.  This 
483          offset is added to C{_linenum} when displaying the line number 
484          of the error.  Default value: 1. 
485      @type _offset: C{int} 
486      @ivar _descr: A description of the error. 
487      @type _descr: C{string} 
488      @ivar _fatal: True if this is a fatal error. 
489      @type _fatal: C{boolean} 
490      """ 
491 -    def __init__(self, descr, linenum=None, is_fatal=1): 
 492          """ 
493          @type descr: C{string} 
494          @param descr: A description of the error. 
495          @type linenum: C{int} 
496          @param linenum: The line on which the error occured within 
497              the docstring.  The linenum of the first line is 0. 
498          @type is_fatal: C{boolean} 
499          @param is_fatal: True if this is a fatal error. 
500          """ 
501          self._descr = descr 
502          self._linenum = linenum 
503          self._fatal = is_fatal 
504          self._offset = 1 
 505                    
507          """ 
508          @return: true if this is a fatal error.  If an error is fatal, 
509              then epydoc should ignore the output of the parser, and 
510              parse the docstring as plaintext. 
511          @rtype: C{boolean} 
512          """ 
513          return self._fatal 
514   
516          """ 
517          @return: The line number on which the error occured (including 
518          any offset).  If the line number is unknown, then return 
519          C{None}. 
520          @rtype: C{int} or C{None} 
521          """ 
522          if self._linenum is None: return None 
523          else: return self._offset + self._linenum 
524   
526          """ 
527          Set the line number offset for this error.  This offset is the 
528          line number where the docstring begins.  This offset is added 
529          to C{_linenum} when displaying the line number of the error. 
530   
531          @param offset: The new line number offset. 
532          @type offset: C{int} 
533          @rtype: C{None} 
534          """ 
535          self._offset = offset 
536   
539       
541          """ 
542          Return a string representation of this C{ParseError}.  This 
543          multi-line string contains a description of the error, and 
544          specifies where it occured. 
545           
546          @return: the informal representation of this C{ParseError}. 
547          @rtype: C{string} 
548          """ 
549          if self._linenum is not None: 
550              return 'Line %s: %s' % (self._linenum+self._offset, self.descr()) 
551          else: 
552              return self.descr() 
 553       
555          """ 
556          Return the formal representation of this C{ParseError}. 
557          C{ParseError}s have formal representations of the form:: 
558             <ParseError on line 12> 
559   
560          @return: the formal representation of this C{ParseError}. 
561          @rtype: C{string} 
562          """ 
563          if self._linenum is None: 
564              return '<ParseError on line %d' % self._offset 
565          else: 
566              return '<ParseError on line %d>' % (self._linenum+self._offset) 
567   
569          """ 
570          Compare two C{ParseError}s, based on their line number. 
571            - Return -1 if C{self.linenum<other.linenum} 
572            - Return +1 if C{self.linenum>other.linenum} 
573            - Return 0 if C{self.linenum==other.linenum}. 
574          The return value is undefined if C{other} is not a 
575          ParseError. 
576   
577          @rtype: C{int} 
578          """ 
579          if not isinstance(other, ParseError): return -1000 
580          return cmp(self._linenum+self._offset, 
581                     other._linenum+other._offset)