1
2
3
4
5
6
7
8
9
10 """
11 Markup language support for docstrings. Each submodule defines a
12 parser for a single markup language. These parsers convert an
13 object's docstring to a L{ParsedDocstring}, a standard intermediate
14 representation that can be used to generate output.
15 C{ParsedDocstring}s support the following operations:
16 - output generation (L{to_plaintext()<ParsedDocstring.to_plaintext>},
17 L{to_html()<ParsedDocstring.to_html>}, and
18 L{to_latex()<ParsedDocstring.to_latex>}).
19 - Summarization (L{summary()<ParsedDocstring.summary>}).
20 - Field extraction (L{split_fields()<ParsedDocstring.split_fields>}).
21 - Index term extraction (L{index_terms()<ParsedDocstring.index_terms>}.
22
23 The L{parse()} function provides a single interface to the
24 C{epydoc.markup} package: it takes a docstring and the name of a
25 markup language; delegates to the appropriate parser; and returns the
26 parsed docstring (along with any errors or warnings that were
27 generated).
28
29 The C{ParsedDocstring} output generation methods (C{to_M{format}()})
30 use a L{DocstringLinker} to link the docstring output with the rest of
31 the documentation that epydoc generates. C{DocstringLinker}s are
32 currently responsible for translating two kinds of crossreference:
33 - index terms (L{translate_indexterm()
34 <DocstringLinker.translate_indexterm>}).
35 - identifier crossreferences (L{translate_identifier_xref()
36 <DocstringLinker.translate_identifier_xref>}).
37
38 A parsed docstring's fields can be extracted using the
39 L{ParsedDocstring.split_fields()} method. This method divides a
40 docstring into its main body and a list of L{Field}s, each of which
41 encodes a single field. The field's bodies are encoded as
42 C{ParsedDocstring}s.
43
44 Markup errors are represented using L{ParseError}s. These exception
45 classes record information about the cause, location, and severity of
46 each error.
47
48 @sort: parse, ParsedDocstring, Field, DocstringLinker
49 @group Errors and Warnings: ParseError
50 @group Utility Functions: parse_type_of
51 @var SCRWIDTH: The default width with which text will be wrapped
52 when formatting the output of the parser.
53 @type SCRWIDTH: C{int}
54 @var _parse_warnings: Used by L{_parse_warn}.
55 """
56 __docformat__ = 'epytext en'
57
58 import re, types, sys
59 from epydoc import log
60 from epydoc.util import plaintext_to_html, plaintext_to_latex
61 import epydoc
62 from epydoc.compat import *
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 _markup_language_registry = {
81 'restructuredtext': 'epydoc.markup.restructuredtext',
82 'epytext': 'epydoc.markup.epytext',
83 'plaintext': 'epydoc.markup.plaintext',
84 'javadoc': 'epydoc.markup.javadoc',
85 }
86
88 """
89 Register a new markup language named C{name}, which can be parsed
90 by the function C{parse_function}.
91
92 @param name: The name of the markup language. C{name} should be a
93 simple identifier, such as C{'epytext'} or C{'restructuredtext'}.
94 Markup language names are case insensitive.
95
96 @param parse_function: A function which can be used to parse the
97 markup language, and returns a L{ParsedDocstring}. It should
98 have the following signature:
99
100 >>> def parse(s, errors):
101 ... 'returns a ParsedDocstring'
102
103 Where:
104 - C{s} is the string to parse. (C{s} will be a unicode
105 string.)
106 - C{errors} is a list; any errors that are generated
107 during docstring parsing should be appended to this
108 list (as L{ParseError} objects).
109 """
110 _markup_language_registry[name.lower()] = parse_function
111
112 MARKUP_LANGUAGES_USED = set()
113
114 -def parse(docstring, markup='plaintext', errors=None, **options):
115 """
116 Parse the given docstring, and use it to construct a
117 C{ParsedDocstring}. If any fatal C{ParseError}s are encountered
118 while parsing the docstring, then the docstring will be rendered
119 as plaintext, instead.
120
121 @type docstring: C{string}
122 @param docstring: The docstring to encode.
123 @type markup: C{string}
124 @param markup: The name of the markup language that is used by
125 the docstring. If the markup language is not supported, then
126 the docstring will be treated as plaintext. The markup name
127 is case-insensitive.
128 @param errors: A list where any errors generated during parsing
129 will be stored. If no list is specified, then fatal errors
130 will generate exceptions, and non-fatal errors will be
131 ignored.
132 @type errors: C{list} of L{ParseError}
133 @rtype: L{ParsedDocstring}
134 @return: A L{ParsedDocstring} that encodes the contents of
135 C{docstring}.
136 @raise ParseError: If C{errors} is C{None} and an error is
137 encountered while parsing.
138 """
139
140 raise_on_error = (errors is None)
141 if errors == None: errors = []
142
143
144 markup = markup.lower()
145
146
147 if not re.match(r'\w+', markup):
148 _parse_warn('Bad markup language name %r. Treating '
149 'docstrings as plaintext.' % markup)
150 import epydoc.markup.plaintext as plaintext
151 return plaintext.parse_docstring(docstring, errors, **options)
152
153
154 if markup not in _markup_language_registry:
155 _parse_warn('Unsupported markup language %r. Treating '
156 'docstrings as plaintext.' % markup)
157 import epydoc.markup.plaintext as plaintext
158 return plaintext.parse_docstring(docstring, errors, **options)
159
160
161 parse_docstring = _markup_language_registry[markup]
162
163
164 if isinstance(parse_docstring, basestring):
165 try: exec('from %s import parse_docstring' % parse_docstring)
166 except ImportError, e:
167 _parse_warn('Error importing %s for markup language %s: %s' %
168 (parse_docstring, markup, e))
169 import epydoc.markup.plaintext as plaintext
170 return plaintext.parse_docstring(docstring, errors, **options)
171 _markup_language_registry[markup] = parse_docstring
172
173
174 MARKUP_LANGUAGES_USED.add(markup)
175
176
177 try: parsed_docstring = parse_docstring(docstring, errors, **options)
178 except KeyboardInterrupt: raise
179 except Exception, e:
180 if epydoc.DEBUG: raise
181 log.error('Internal error while parsing a docstring: %s; '
182 'treating docstring as plaintext' % e)
183 import epydoc.markup.plaintext as plaintext
184 return plaintext.parse_docstring(docstring, errors, **options)
185
186
187 fatal_errors = [e for e in errors if e.is_fatal()]
188 if fatal_errors and raise_on_error: raise fatal_errors[0]
189 if fatal_errors:
190 import epydoc.markup.plaintext as plaintext
191 return plaintext.parse_docstring(docstring, errors, **options)
192
193 return parsed_docstring
194
195
196 _parse_warnings = {}
206
207
208
209
211 """
212 A standard intermediate representation for parsed docstrings that
213 can be used to generate output. Parsed docstrings are produced by
214 markup parsers (such as L{epytext.parse} or L{javadoc.parse}).
215 C{ParsedDocstring}s support several kinds of operation:
216 - output generation (L{to_plaintext()}, L{to_html()}, and
217 L{to_latex()}).
218 - Summarization (L{summary()}).
219 - Field extraction (L{split_fields()}).
220 - Index term extraction (L{index_terms()}.
221
222 The output generation methods (C{to_M{format}()}) use a
223 L{DocstringLinker} to link the docstring output with the rest
224 of the documentation that epydoc generates.
225
226 Subclassing
227 ===========
228 The only method that a subclass is I{required} to implement is
229 L{to_plaintext()}; but it is often useful to override the other
230 methods. The default behavior of each method is described below:
231 - C{to_I{format}}: Calls C{to_plaintext}, and uses the string it
232 returns to generate verbatim output.
233 - C{summary}: Returns C{self} (i.e., the entire docstring).
234 - C{split_fields}: Returns C{(self, [])} (i.e., extracts no
235 fields).
236 - C{index_terms}: Returns C{[]} (i.e., extracts no index terms).
237
238 If and when epydoc adds more output formats, new C{to_I{format}}
239 methods will be added to this base class; but they will always
240 be given a default implementation.
241 """
243 """
244 Split this docstring into its body and its fields.
245
246 @return: A tuple C{(M{body}, M{fields})}, where C{M{body}} is
247 the main body of this docstring, and C{M{fields}} is a list
248 of its fields.
249 @rtype: C{(L{ParsedDocstring}, list of L{Field})}
250 @param errors: A list where any errors generated during
251 splitting will be stored. If no list is specified, then
252 errors will be ignored.
253 @type errors: C{list} of L{ParseError}
254 """
255
256 return self, []
257
259 """
260 @return: A short summary of this docstring. Typically, the
261 summary consists of the first sentence of the docstring.
262 @rtype: L{ParsedDocstring}
263 """
264
265 return self
266
268 """
269 @return: A new parsed docstring containing the concatination
270 of this docstring and C{other}.
271 @raise ValueError: If the two parsed docstrings are
272 incompatible.
273 """
274 return ConcatenatedDocstring(self, other)
275
277
278 - def to_html(self, docstring_linker, **options):
279 """
280 Translate this docstring to HTML.
281
282 @param docstring_linker: An HTML translator for crossreference
283 links into and out of the docstring.
284 @type docstring_linker: L{DocstringLinker}
285 @param options: Any extra options for the output. Unknown
286 options are ignored.
287 @return: An HTML fragment that encodes this docstring.
288 @rtype: C{string}
289 """
290
291 plaintext = plaintext_to_html(self.to_plaintext(docstring_linker))
292 return '<pre class="literalblock">\n%s\n</pre>\n' % plaintext
293
294 - def to_latex(self, docstring_linker, **options):
295 """
296 Translate this docstring to LaTeX.
297
298 @param docstring_linker: A LaTeX translator for crossreference
299 links into and out of the docstring.
300 @type docstring_linker: L{DocstringLinker}
301 @param options: Any extra options for the output. Unknown
302 options are ignored.
303 @return: A LaTeX fragment that encodes this docstring.
304 @rtype: C{string}
305 """
306
307 plaintext = plaintext_to_latex(self.to_plaintext(docstring_linker))
308 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % plaintext
309
310 - def to_plaintext(self, docstring_linker, **options):
311 """
312 Translate this docstring to plaintext.
313
314 @param docstring_linker: A plaintext translator for
315 crossreference links into and out of the docstring.
316 @type docstring_linker: L{DocstringLinker}
317 @param options: Any extra options for the output. Unknown
318 options are ignored.
319 @return: A plaintext fragment that encodes this docstring.
320 @rtype: C{string}
321 """
322 raise NotImplementedError, 'ParsedDocstring.to_plaintext()'
323
325 """
326 @return: The list of index terms that are defined in this
327 docstring. Each of these items will be added to the index
328 page of the documentation.
329 @rtype: C{list} of C{ParsedDocstring}
330 """
331
332 return []
333
334
335
336
338 - def __init__(self, *parsed_docstrings):
339 self._parsed_docstrings = parsed_docstrings
340
342 bodies = []
343 fields = []
344 for doc in self._parsed_docstrings:
345 b,f = doc.split_fields()
346 bodies.append(b)
347 fields.extend(f)
348
349 return ConcatenatedDocstring(*bodies), fields
350
352 return self._parsed_docstrings[0].summary()
353
354 - def to_html(self, docstring_linker, **options):
355 htmlstring = ''
356 for doc in self._parsed_docstrings:
357 htmlstring += doc.to_html(docstring_linker, **options)
358 return htmlstring
359
360 - def to_latex(self, docstring_linker, **options):
361 latexstring = ''
362 for doc in self._parsed_docstrings:
363 latexstring += doc.to_latex(docstring_linker, **options)
364 return latexstring
365
366 - def to_plaintext(self, docstring_linker, **options):
367 textstring = ''
368 for doc in self._parsed_docstrings:
369 textstring += doc.to_plaintext(docstring_linker, **options)
370 return textstring
371
373 terms = []
374 for doc in self._parsed_docstrings:
375 terms += doc.index_terms()
376 return terms
377
378
379
380
382 """
383 The contents of a docstring's field. Docstring fields are used
384 to describe specific aspects of an object, such as a parameter of
385 a function or the author of a module. Each field consists of a
386 tag, an optional argument, and a body:
387 - The tag specifies the type of information that the field
388 encodes.
389 - The argument specifies the object that the field describes.
390 The argument may be C{None} or a C{string}.
391 - The body contains the field's information.
392
393 Tags are automatically downcased and stripped; and arguments are
394 automatically stripped.
395 """
397 self._tag = tag.lower().strip()
398 if arg is None: self._arg = None
399 else: self._arg = arg.strip()
400 self._body = body
401
403 """
404 @return: This field's tag.
405 @rtype: C{string}
406 """
407 return self._tag
408
410 """
411 @return: This field's argument, or C{None} if this field has
412 no argument.
413 @rtype: C{string} or C{None}
414 """
415 return self._arg
416
418 """
419 @return: This field's body.
420 @rtype: L{ParsedDocstring}
421 """
422 return self._body
423
425 if self._arg is None:
426 return '<Field @%s: ...>' % self._tag
427 else:
428 return '<Field @%s %s: ...>' % (self._tag, self._arg)
429
430
431
432
434 """
435 A translator for crossreference links into and out of a
436 C{ParsedDocstring}. C{DocstringLinker} is used by
437 C{ParsedDocstring} to convert these crossreference links into
438 appropriate output formats. For example,
439 C{DocstringLinker.to_html} expects a C{DocstringLinker} that
440 converts crossreference links to HTML.
441 """
443 """
444 Translate an index term to the appropriate output format. The
445 output will typically include a crossreference anchor.
446
447 @type indexterm: L{ParsedDocstring}
448 @param indexterm: The index term to translate.
449 @rtype: C{string}
450 @return: The translated index term.
451 """
452 raise NotImplementedError, 'DocstringLinker.translate_indexterm()'
453
455 """
456 Translate a crossreference link to a Python identifier to the
457 appropriate output format. The output will typically include
458 a reference or pointer to the crossreference target.
459
460 @type identifier: C{string}
461 @param identifier: The name of the Python identifier that
462 should be linked to.
463 @type label: C{string} or C{None}
464 @param label: The label that should be used for the identifier,
465 if it's different from the name of the identifier.
466 @rtype: C{string}
467 @return: The translated crossreference link.
468 """
469 raise NotImplementedError, 'DocstringLinker.translate_xref()'
476 """
477 The base class for errors generated while parsing docstrings.
478
479 @ivar _linenum: The line on which the error occured within the
480 docstring. The linenum of the first line is 0.
481 @type _linenum: C{int}
482 @ivar _offset: The line number where the docstring begins. This
483 offset is added to C{_linenum} when displaying the line number
484 of the error. Default value: 1.
485 @type _offset: C{int}
486 @ivar _descr: A description of the error.
487 @type _descr: C{string}
488 @ivar _fatal: True if this is a fatal error.
489 @type _fatal: C{boolean}
490 """
491 - def __init__(self, descr, linenum=None, is_fatal=1):
492 """
493 @type descr: C{string}
494 @param descr: A description of the error.
495 @type linenum: C{int}
496 @param linenum: The line on which the error occured within
497 the docstring. The linenum of the first line is 0.
498 @type is_fatal: C{boolean}
499 @param is_fatal: True if this is a fatal error.
500 """
501 self._descr = descr
502 self._linenum = linenum
503 self._fatal = is_fatal
504 self._offset = 1
505
507 """
508 @return: true if this is a fatal error. If an error is fatal,
509 then epydoc should ignore the output of the parser, and
510 parse the docstring as plaintext.
511 @rtype: C{boolean}
512 """
513 return self._fatal
514
516 """
517 @return: The line number on which the error occured (including
518 any offset). If the line number is unknown, then return
519 C{None}.
520 @rtype: C{int} or C{None}
521 """
522 if self._linenum is None: return None
523 else: return self._offset + self._linenum
524
526 """
527 Set the line number offset for this error. This offset is the
528 line number where the docstring begins. This offset is added
529 to C{_linenum} when displaying the line number of the error.
530
531 @param offset: The new line number offset.
532 @type offset: C{int}
533 @rtype: C{None}
534 """
535 self._offset = offset
536
539
541 """
542 Return a string representation of this C{ParseError}. This
543 multi-line string contains a description of the error, and
544 specifies where it occured.
545
546 @return: the informal representation of this C{ParseError}.
547 @rtype: C{string}
548 """
549 if self._linenum is not None:
550 return 'Line %s: %s' % (self._linenum+self._offset, self.descr())
551 else:
552 return self.descr()
553
555 """
556 Return the formal representation of this C{ParseError}.
557 C{ParseError}s have formal representations of the form::
558 <ParseError on line 12>
559
560 @return: the formal representation of this C{ParseError}.
561 @rtype: C{string}
562 """
563 if self._linenum is None:
564 return '<ParseError on line %d' % self._offset
565 else:
566 return '<ParseError on line %d>' % (self._linenum+self._offset)
567
569 """
570 Compare two C{ParseError}s, based on their line number.
571 - Return -1 if C{self.linenum<other.linenum}
572 - Return +1 if C{self.linenum>other.linenum}
573 - Return 0 if C{self.linenum==other.linenum}.
574 The return value is undefined if C{other} is not a
575 ParseError.
576
577 @rtype: C{int}
578 """
579 if not isinstance(other, ParseError): return -1000
580 return cmp(self._linenum+self._offset,
581 other._linenum+other._offset)