epydoc.markup

  1  # 
  2  # epydoc package file 
  3  # 
  4  # A python documentation Module 
  5  # Edward Loper 
  6  # 
  7  # $Id: __init__.py 1209 2006-04-10 13:23:24Z edloper $ 
  8  # 
  9   
 10  """ 
 11  Markup language support for docstrings.  Each submodule defines a 
 12  parser for a single markup language.  These parsers convert an 
 13  object's docstring to a L{ParsedDocstring}, a standard intermediate 
 14  representation that can be used to generate output. 
 15  C{ParsedDocstring}s support the following operations: 
 16    - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>}, 
 17      L{to_html()<ParsedDocstring.to_html>}, and 
 18      L{to_latex()<ParsedDocstring.to_latex>}). 
 19    - Summarization (L{summary()<ParsedDocstring.summary>}). 
 20    - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}). 
 21    - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}. 
 22   
 23  The L{parse()} function provides a single interface to the 
 24  C{epydoc.markup} package: it takes a docstring and the name of a 
 25  markup language; delegates to the appropriate parser; and returns the 
 26  parsed docstring (along with any errors or warnings that were 
 27  generated). 
 28   
 29  The C{ParsedDocstring} output generation methods (C{to_M{format}()}) 
 30  use a L{DocstringLinker} to link the docstring output with the rest of 
 31  the documentation that epydoc generates.  C{DocstringLinker}s are 
 32  currently responsible for translating two kinds of crossreference: 
 33    - index terms (L{translate_indexterm() 
 34      <DocstringLinker.translate_indexterm>}). 
 35    - identifier crossreferences (L{translate_identifier_xref() 
 36      <DocstringLinker.translate_identifier_xref>}). 
 37   
 38  A parsed docstring's fields can be extracted using the 
 39  L{ParsedDocstring.split_fields()} method.  This method divides a 
 40  docstring into its main body and a list of L{Field}s, each of which 
 41  encodes a single field.  The field's bodies are encoded as 
 42  C{ParsedDocstring}s. 
 43   
 44  Markup errors are represented using L{ParseError}s.  These exception 
 45  classes record information about the cause, location, and severity of 
 46  each error. 
 47   
 48  @sort: parse, ParsedDocstring, Field, DocstringLinker 
 49  @group Errors and Warnings: ParseError 
 50  @group Utility Functions: parse_type_of 
 51  @var SCRWIDTH: The default width with which text will be wrapped 
 52        when formatting the output of the parser. 
 53  @type SCRWIDTH: C{int} 
 54  @var _parse_warnings: Used by L{_parse_warn}. 
 55  """ 
 56  __docformat__ = 'epytext en' 
 57   
 58  import re, types, sys 
 59  from epydoc import log 
 60  from epydoc.util import plaintext_to_html, plaintext_to_latex 
 61  import epydoc 
 62  from epydoc.compat import * 
 63   
 64  ################################################## 
 65  ## Contents 
 66  ################################################## 
 67  # 
 68  # 1. parse() dispatcher 
 69  # 2. ParsedDocstring abstract base class 
 70  # 3. Field class 
 71  # 4. Docstring Linker 
 72  # 5. ParseError exceptions 
 73  # 6. Misc helpers 
 74  # 
 75   
 76  ################################################## 
 77  ## Dispatcher 
 78  ################################################## 
 79   
 80  _markup_language_registry = { 
 81      'restructuredtext': 'epydoc.markup.restructuredtext', 
 82      'epytext': 'epydoc.markup.epytext', 
 83      'plaintext': 'epydoc.markup.plaintext', 
 84      'javadoc': 'epydoc.markup.javadoc', 
 85      } 
 86   
 87 -def register_markup_language(name, parse_function): 
 88      """ 
 89      Register a new markup language named C{name}, which can be parsed 
 90      by the function C{parse_function}. 
 91   
 92      @param name: The name of the markup language.  C{name} should be a 
 93      simple identifier, such as C{'epytext'} or C{'restructuredtext'}. 
 94      Markup language names are case insensitive. 
 95   
 96      @param parse_function: A function which can be used to parse the 
 97          markup language, and returns a L{ParsedDocstring}.  It should 
 98          have the following signature: 
 99   
100              >>> def parse(s, errors): 
101              ...     'returns a ParsedDocstring' 
102   
103          Where: 
104              - C{s} is the string to parse.  (C{s} will be a unicode 
105                string.) 
106              - C{errors} is a list; any errors that are generated 
107                during docstring parsing should be appended to this 
108                list (as L{ParseError} objects). 
109      """ 
110      _markup_language_registry[name.lower()] = parse_function 
111   
112  MARKUP_LANGUAGES_USED = set() 
113   
114 -def parse(docstring, markup='plaintext', errors=None, **options): 
115      """ 
116      Parse the given docstring, and use it to construct a 
117      C{ParsedDocstring}.  If any fatal C{ParseError}s are encountered 
118      while parsing the docstring, then the docstring will be rendered 
119      as plaintext, instead. 
120   
121      @type docstring: C{string} 
122      @param docstring: The docstring to encode. 
123      @type markup: C{string} 
124      @param markup: The name of the markup language that is used by 
125          the docstring.  If the markup language is not supported, then 
126          the docstring will be treated as plaintext.  The markup name 
127          is case-insensitive. 
128      @param errors: A list where any errors generated during parsing 
129          will be stored.  If no list is specified, then fatal errors 
130          will generate exceptions, and non-fatal errors will be 
131          ignored. 
132      @type errors: C{list} of L{ParseError} 
133      @rtype: L{ParsedDocstring} 
134      @return: A L{ParsedDocstring} that encodes the contents of 
135          C{docstring}. 
136      @raise ParseError: If C{errors} is C{None} and an error is 
137          encountered while parsing. 
138      """ 
139      # Initialize errors list. 
140      raise_on_error = (errors is None) 
141      if errors == None: errors = [] 
142   
143      # Normalize the markup language name. 
144      markup = markup.lower() 
145   
146      # Is the markup language valid? 
147      if not re.match(r'\w+', markup): 
148          _parse_warn('Bad markup language name %r.  Treating ' 
149                      'docstrings as plaintext.' % markup) 
150          import epydoc.markup.plaintext as plaintext 
151          return plaintext.parse_docstring(docstring, errors, **options) 
152   
153      # Is the markup language supported? 
154      if markup not in _markup_language_registry: 
155          _parse_warn('Unsupported markup language %r.  Treating ' 
156                      'docstrings as plaintext.' % markup) 
157          import epydoc.markup.plaintext as plaintext 
158          return plaintext.parse_docstring(docstring, errors, **options) 
159   
160      # Get the parse function. 
161      parse_docstring = _markup_language_registry[markup] 
162   
163      # If it's a string, then it names a function to import. 
164      if isinstance(parse_docstring, basestring): 
165          try: exec('from %s import parse_docstring' % parse_docstring) 
166          except ImportError, e: 
167              _parse_warn('Error importing %s for markup language %s: %s' % 
168                          (parse_docstring, markup, e)) 
169              import epydoc.markup.plaintext as plaintext 
170              return plaintext.parse_docstring(docstring, errors, **options) 
171          _markup_language_registry[markup] = parse_docstring 
172   
173      # Keep track of which markup languages have been used so far. 
174      MARKUP_LANGUAGES_USED.add(markup) 
175   
176      # Parse the docstring. 
177      try: parsed_docstring = parse_docstring(docstring, errors, **options) 
178      except KeyboardInterrupt: raise 
179      except Exception, e: 
180          if epydoc.DEBUG: raise 
181          log.error('Internal error while parsing a docstring: %s; ' 
182                    'treating docstring as plaintext' % e) 
183          import epydoc.markup.plaintext as plaintext 
184          return plaintext.parse_docstring(docstring, errors, **options) 
185   
186      # Check for fatal errors. 
187      fatal_errors = [e for e in errors if e.is_fatal()] 
188      if fatal_errors and raise_on_error: raise fatal_errors[0] 
189      if fatal_errors: 
190          import epydoc.markup.plaintext as plaintext 
191          return plaintext.parse_docstring(docstring, errors, **options) 
192   
193      return parsed_docstring 
194   
195  # only issue each warning once: 
196  _parse_warnings = {} 
197 -def _parse_warn(estr): 
198      """ 
199      Print a warning message.  If the given error has already been 
200      printed, then do nothing. 
201      """ 
202      global _parse_warnings 
203      if _parse_warnings.has_key(estr): return 
204      _parse_warnings[estr] = 1 
205      log.warning(estr) 
206   
207  ################################################## 
208  ## ParsedDocstring 
209  ################################################## 
210 -class ParsedDocstring: 
211      """ 
212      A standard intermediate representation for parsed docstrings that 
213      can be used to generate output.  Parsed docstrings are produced by 
214      markup parsers (such as L{epytext.parse} or L{javadoc.parse}). 
215      C{ParsedDocstring}s support several kinds of operation:     
216        - output generation (L{to_plaintext()}, L{to_html()}, and 
217          L{to_latex()}). 
218        - Summarization (L{summary()}). 
219        - Field extraction (L{split_fields()}). 
220        - Index term extraction (L{index_terms()}. 
221   
222      The output generation methods (C{to_M{format}()}) use a 
223      L{DocstringLinker} to link the docstring output with the rest 
224      of the documentation that epydoc generates. 
225   
226      Subclassing 
227      =========== 
228      The only method that a subclass is I{required} to implement is 
229      L{to_plaintext()}; but it is often useful to override the other 
230      methods.  The default behavior of each method is described below: 
231        - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it 
232          returns to generate verbatim output. 
233        - C{summary}: Returns C{self} (i.e., the entire docstring). 
234        - C{split_fields}: Returns C{(self, [])} (i.e., extracts no 
235          fields). 
236        - C{index_terms}: Returns C{[]} (i.e., extracts no index terms). 
237   
238      If and when epydoc adds more output formats, new C{to_I{format}} 
239      methods will be added to this base class; but they will always 
240      be given a default implementation. 
241      """ 
242 -    def split_fields(self, errors=None): 
243          """ 
244          Split this docstring into its body and its fields. 
245           
246          @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is 
247              the main body of this docstring, and C{M{fields}} is a list 
248              of its fields. 
249          @rtype: C{(L{ParsedDocstring}, list of L{Field})} 
250          @param errors: A list where any errors generated during 
251              splitting will be stored.  If no list is specified, then 
252              errors will be ignored. 
253          @type errors: C{list} of L{ParseError} 
254          """ 
255          # Default behavior: 
256          return self, [] 
257   
258 -    def summary(self): 
259          """ 
260          @return: A short summary of this docstring.  Typically, the 
261              summary consists of the first sentence of the docstring. 
262          @rtype: L{ParsedDocstring} 
263          """ 
264          # Default behavior: 
265          return self 
266   
267 -    def concatenate(self, other): 
268          """ 
269          @return: A new parsed docstring containing the concatination 
270              of this docstring and C{other}. 
271          @raise ValueError: If the two parsed docstrings are 
272              incompatible. 
273          """ 
274          return ConcatenatedDocstring(self, other) 
275   
276 -    def __add__(self, other): return self.concatenate(other) 
277   
278 -    def to_html(self, docstring_linker, **options): 
279          """ 
280          Translate this docstring to HTML. 
281   
282          @param docstring_linker: An HTML translator for crossreference 
283              links into and out of the docstring. 
284          @type docstring_linker: L{DocstringLinker} 
285          @param options: Any extra options for the output.  Unknown 
286              options are ignored. 
287          @return: An HTML fragment that encodes this docstring. 
288          @rtype: C{string} 
289          """ 
290          # Default behavior: 
291          plaintext = plaintext_to_html(self.to_plaintext(docstring_linker)) 
292          return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext 
293   
294 -    def to_latex(self, docstring_linker, **options): 
295          """ 
296          Translate this docstring to LaTeX. 
297           
298          @param docstring_linker: A LaTeX translator for crossreference 
299              links into and out of the docstring. 
300          @type docstring_linker: L{DocstringLinker} 
301          @param options: Any extra options for the output.  Unknown 
302              options are ignored. 
303          @return: A LaTeX fragment that encodes this docstring. 
304          @rtype: C{string} 
305          """ 
306          # Default behavior: 
307          plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker)) 
308          return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext 
309   
310 -    def to_plaintext(self, docstring_linker, **options): 
311          """ 
312          Translate this docstring to plaintext. 
313           
314          @param docstring_linker: A plaintext translator for 
315              crossreference links into and out of the docstring. 
316          @type docstring_linker: L{DocstringLinker} 
317          @param options: Any extra options for the output.  Unknown 
318              options are ignored. 
319          @return: A plaintext fragment that encodes this docstring. 
320          @rtype: C{string} 
321          """ 
322          raise NotImplementedError, 'ParsedDocstring.to_plaintext()' 
323   
324 -    def index_terms(self): 
325          """ 
326          @return: The list of index terms that are defined in this 
327              docstring.  Each of these items will be added to the index 
328              page of the documentation. 
329          @rtype: C{list} of C{ParsedDocstring} 
330          """ 
331          # Default behavior: 
332          return [] 
333   
334  ################################################## 
335  ## Concatenated Docstring 
336  ################################################## 
337 -class ConcatenatedDocstring: 
338 -    def __init__(self, *parsed_docstrings): 
339          self._parsed_docstrings = parsed_docstrings 
340           
341 -    def split_fields(self, errors=None): 
342          bodies = [] 
343          fields = [] 
344          for doc in self._parsed_docstrings: 
345              b,f = doc.split_fields() 
346              bodies.append(b) 
347              fields.extend(f) 
348   
349          return ConcatenatedDocstring(*bodies), fields 
350   
351 -    def summary(self): 
352          return self._parsed_docstrings[0].summary() 
353   
354 -    def to_html(self, docstring_linker, **options): 
355          htmlstring = '' 
356          for doc in self._parsed_docstrings: 
357              htmlstring += doc.to_html(docstring_linker, **options) 
358          return htmlstring 
359   
360 -    def to_latex(self, docstring_linker, **options): 
361          latexstring = '' 
362          for doc in self._parsed_docstrings: 
363              latexstring += doc.to_latex(docstring_linker, **options) 
364          return latexstring 
365   
366 -    def to_plaintext(self, docstring_linker, **options): 
367          textstring = '' 
368          for doc in self._parsed_docstrings: 
369              textstring += doc.to_plaintext(docstring_linker, **options) 
370          return textstring 
371   
372 -    def index_terms(self): 
373          terms = [] 
374          for doc in self._parsed_docstrings: 
375              terms += doc.index_terms() 
376          return terms 
377       
378  ################################################## 
379  ## Fields 
380  ################################################## 
381 -class Field: 
382      """ 
383      The contents of a docstring's field.  Docstring fields are used 
384      to describe specific aspects of an object, such as a parameter of 
385      a function or the author of a module.  Each field consists of a 
386      tag, an optional argument, and a body: 
387        - The tag specifies the type of information that the field 
388          encodes. 
389        - The argument specifies the object that the field describes. 
390          The argument may be C{None} or a C{string}. 
391        - The body contains the field's information. 
392   
393      Tags are automatically downcased and stripped; and arguments are 
394      automatically stripped. 
395      """ 
396 -    def __init__(self, tag, arg, body): 
397          self._tag = tag.lower().strip() 
398          if arg is None: self._arg = None 
399          else: self._arg = arg.strip() 
400          self._body = body 
401   
402 -    def tag(self): 
403          """ 
404          @return: This field's tag. 
405          @rtype: C{string} 
406          """ 
407          return self._tag 
408   
409 -    def arg(self): 
410          """ 
411          @return: This field's argument, or C{None} if this field has 
412              no argument. 
413          @rtype: C{string} or C{None} 
414          """ 
415          return self._arg 
416   
417 -    def body(self): 
418          """ 
419          @return: This field's body. 
420          @rtype: L{ParsedDocstring} 
421          """ 
422          return self._body 
423   
424 -    def __repr__(self): 
425          if self._arg is None: 
426              return '<Field @%s: ...>' % self._tag 
427          else: 
428              return '<Field @%s %s: ...>' % (self._tag, self._arg) 
429   
430  ################################################## 
431  ## Docstring Linker (resolves crossreferences) 
432  ################################################## 
433 -class DocstringLinker:  
434      """ 
435      A translator for crossreference links into and out of a 
436      C{ParsedDocstring}.  C{DocstringLinker} is used by 
437      C{ParsedDocstring} to convert these crossreference links into 
438      appropriate output formats.  For example, 
439      C{DocstringLinker.to_html} expects a C{DocstringLinker} that 
440      converts crossreference links to HTML. 
441      """ 
442 -    def translate_indexterm(self, indexterm): 
443          """ 
444          Translate an index term to the appropriate output format.  The 
445          output will typically include a crossreference anchor. 
446   
447          @type indexterm: L{ParsedDocstring} 
448          @param indexterm: The index term to translate. 
449          @rtype: C{string} 
450          @return: The translated index term. 
451          """ 
452          raise NotImplementedError, 'DocstringLinker.translate_indexterm()' 
453   
454 -    def translate_identifier_xref(self, identifier, label=None): 
455          """ 
456          Translate a crossreference link to a Python identifier to the 
457          appropriate output format.  The output will typically include 
458          a reference or pointer to the crossreference target. 
459   
460          @type identifier: C{string} 
461          @param identifier: The name of the Python identifier that 
462              should be linked to. 
463          @type label: C{string} or C{None} 
464          @param label: The label that should be used for the identifier, 
465              if it's different from the name of the identifier. 
466          @rtype: C{string} 
467          @return: The translated crossreference link. 
468          """ 
469          raise NotImplementedError, 'DocstringLinker.translate_xref()'
Source Code for Package epydoc.markup