1
2
3
4
5
6
7
8
9 """
10 Parser for epytext strings. Epytext is a lightweight markup whose
11 primary intended application is Python documentation strings. This
12 parser converts Epytext strings to a XML/DOM representation. Epytext
13 strings can contain the following X{structural blocks}:
14
15 - X{epytext}: The top-level element of the DOM tree.
16 - X{para}: A paragraph of text. Paragraphs contain no newlines,
17 and all spaces are soft.
18 - X{section}: A section or subsection.
19 - X{field}: A tagged field. These fields provide information
20 about specific aspects of a Python object, such as the
21 description of a function's parameter, or the author of a
22 module.
23 - X{literalblock}: A block of literal text. This text should be
24 displayed as it would be displayed in plaintext. The
25 parser removes the appropriate amount of leading whitespace
26 from each line in the literal block.
27 - X{doctestblock}: A block containing sample python code,
28 formatted according to the specifications of the C{doctest}
29 module.
30 - X{ulist}: An unordered list.
31 - X{olist}: An ordered list.
32 - X{li}: A list item. This tag is used both for unordered list
33 items and for ordered list items.
34
35 Additionally, the following X{inline regions} may be used within
36 C{para} blocks:
37
38 - X{code}: Source code and identifiers.
39 - X{math}: Mathematical expressions.
40 - X{index}: A term which should be included in an index, if one
41 is generated.
42 - X{italic}: Italicized text.
43 - X{bold}: Bold-faced text.
44 - X{uri}: A Universal Resource Indicator (URI) or Universal
45 Resource Locator (URL)
46 - X{link}: A Python identifier which should be hyperlinked to
47 the named object's documentation, when possible.
48
49 The returned DOM tree will conform to the the following Document Type
50 Description::
51
52 <!ENTITY % colorized '(code | math | index | italic |
53 bold | uri | link | symbol)*'>
54
55 <!ELEMENT epytext ((para | literalblock | doctestblock |
56 section | ulist | olist)*, fieldlist?)>
57
58 <!ELEMENT para (#PCDATA | %colorized;)*>
59
60 <!ELEMENT section (para | listblock | doctestblock |
61 section | ulist | olist)+>
62
63 <!ELEMENT fieldlist (field+)>
64 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock)
65 ulist | olist)+)>
66 <!ELEMENT tag (#PCDATA)>
67 <!ELEMENT arg (#PCDATA)>
68
69 <!ELEMENT literalblock (#PCDATA)>
70 <!ELEMENT doctestblock (#PCDATA)>
71
72 <!ELEMENT ulist (li+)>
73 <!ELEMENT olist (li+)>
74 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+>
75 <!ATTLIST li bullet NMTOKEN #IMPLIED>
76 <!ATTLIST olist start NMTOKEN #IMPLIED>
77
78 <!ELEMENT uri (name, target)>
79 <!ELEMENT link (name, target)>
80 <!ELEMENT name (#PCDATA | %colorized;)*>
81 <!ELEMENT target (#PCDATA)>
82
83 <!ELEMENT code (#PCDATA | %colorized;)*>
84 <!ELEMENT math (#PCDATA | %colorized;)*>
85 <!ELEMENT italic (#PCDATA | %colorized;)*>
86 <!ELEMENT bold (#PCDATA | %colorized;)*>
87 <!ELEMENT indexed (#PCDATA | %colorized;)>
88
89 <!ELEMENT symbol (#PCDATA)>
90
91 @var SYMBOLS: A list of the of escape symbols that are supported
92 by epydoc. Currently the following symbols are supported:
93 <<<SYMBOLS>>>
94 """
95
96
97
98 __docformat__ = 'epytext en'
99
100
101
102
103
104
105
106
107 import re, string, types, sys, os.path
108 from xml.dom.minidom import Document, Text
109 import xml.dom.minidom
110 from epydoc.markup import *
111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
112 from epydoc.docwriter.html_colorize import colorize_doctestblock
113
114
115
116
117
118
119
120 _HEADING_CHARS = "=-~"
121
122
123 _ESCAPES = {'lb':'{', 'rb': '}'}
124
125
126 SYMBOLS = [
127
128 '<-', '->', '^', 'v',
129
130
131 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
132 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
133 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
134 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
135 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
136 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
137 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
138 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
139
140
141 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
142 'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
143 'copy', 'times', 'forall', 'exist', 'part',
144 'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
145 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
146 'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
147 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
148 'sube', 'supe', 'oplus', 'otimes', 'perp',
149
150
151 'infinity', 'integral', 'product',
152 '>=', '<=',
153 ]
154
155 _SYMBOLS = {}
156 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
157
158
159 symblist = ' '
160 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
161 for symbol in SYMBOLS])
162 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
163 del symbol, symblist
164
165
166 _COLORIZING_TAGS = {
167 'C': 'code',
168 'M': 'math',
169 'X': 'indexed',
170 'I': 'italic',
171 'B': 'bold',
172 'U': 'uri',
173 'L': 'link',
174 'E': 'escape',
175 'S': 'symbol',
176 'G': 'graph',
177 }
178
179
180 _LINK_COLORIZING_TAGS = ['link', 'uri']
181
182
183
184
185
186 -def parse(str, errors = None):
187 """
188 Return a DOM tree encoding the contents of an epytext string. Any
189 errors generated during parsing will be stored in C{errors}.
190
191 @param str: The epytext string to parse.
192 @type str: C{string}
193 @param errors: A list where any errors generated during parsing
194 will be stored. If no list is specified, then fatal errors
195 will generate exceptions, and non-fatal errors will be
196 ignored.
197 @type errors: C{list} of L{ParseError}
198 @return: a DOM tree encoding the contents of an epytext string.
199 @rtype: L{xml.dom.minidom.Document}
200 @raise ParseError: If C{errors} is C{None} and an error is
201 encountered while parsing.
202 """
203
204 if errors == None:
205 errors = []
206 raise_on_error = 1
207 else:
208 raise_on_error = 0
209
210
211 str = re.sub('\015\012', '\012', str)
212 str = string.expandtabs(str)
213
214
215 tokens = _tokenize(str, errors)
216
217
218 encountered_field = 0
219
220
221 doc = Document()
222
223
224
225
226
227
228
229
230
231
232
233 stack = [None, doc.createElement('epytext')]
234 indent_stack = [-1, None]
235
236 for token in tokens:
237
238
239
240
241
242
243
244 _pop_completed_blocks(token, stack, indent_stack)
245
246
247 if token.tag == Token.PARA:
248 _add_para(doc, token, stack, indent_stack, errors)
249
250
251 elif token.tag == Token.HEADING:
252 _add_section(doc, token, stack, indent_stack, errors)
253
254
255 elif token.tag == Token.LBLOCK:
256 stack[-1].appendChild(token.to_dom(doc))
257
258
259 elif token.tag == Token.DTBLOCK:
260 stack[-1].appendChild(token.to_dom(doc))
261
262
263 elif token.tag == Token.BULLET:
264 _add_list(doc, token, stack, indent_stack, errors)
265 else:
266 assert 0, 'Unknown token type: '+token.tag
267
268
269 if stack[-1].tagName == 'field':
270 encountered_field = 1
271 elif encountered_field == 1:
272 if len(stack) <= 3:
273 estr = ("Fields must be the final elements in an "+
274 "epytext string.")
275 errors.append(StructuringError(estr, token.startline))
276
277
278 if len([e for e in errors if e.is_fatal()]) > 0:
279 if raise_on_error:
280 raise errors[0]
281 else:
282 return None
283
284
285 doc.appendChild(stack[1])
286 return doc
287
289 """
290 Pop any completed blocks off the stack. This includes any
291 blocks that we have dedented past, as well as any list item
292 blocks that we've dedented to. The top element on the stack
293 should only be a list if we're about to start a new list
294 item (i.e., if the next token is a bullet).
295 """
296 indent = token.indent
297 if indent != None:
298 while (len(stack) > 2):
299 pop = 0
300
301
302 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
303 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
304
305
306
307 elif (token.tag == 'bullet' and indent==indent_stack[-2] and
308 stack[-1].tagName in ('li', 'field')): pop=1
309
310
311 elif (stack[-1].tagName in ('ulist', 'olist') and
312 (token.tag != 'bullet' or token.contents[-1] == ':')):
313 pop=1
314
315
316 if pop == 0: return
317 stack.pop()
318 indent_stack.pop()
319
320 -def _add_para(doc, para_token, stack, indent_stack, errors):
321 """Colorize the given paragraph, and add it to the DOM tree."""
322
323
324 if indent_stack[-1] == None:
325 indent_stack[-1] = para_token.indent
326 if para_token.indent == indent_stack[-1]:
327
328 para = _colorize(doc, para_token, errors)
329 stack[-1].appendChild(para)
330 else:
331 estr = "Improper paragraph indentation."
332 errors.append(StructuringError(estr, para_token.startline))
333
334 -def _add_section(doc, heading_token, stack, indent_stack, errors):
335 """Add a new section to the DOM tree, with the given heading."""
336 if indent_stack[-1] == None:
337 indent_stack[-1] = heading_token.indent
338 elif indent_stack[-1] != heading_token.indent:
339 estr = "Improper heading indentation."
340 errors.append(StructuringError(estr, heading_token.startline))
341
342
343 for tok in stack[2:]:
344 if tok.tagName != "section":
345 estr = "Headings must occur at the top level."
346 errors.append(StructuringError(estr, heading_token.startline))
347 break
348 if (heading_token.level+2) > len(stack):
349 estr = "Wrong underline character for heading."
350 errors.append(StructuringError(estr, heading_token.startline))
351
352
353
354 stack[heading_token.level+2:] = []
355 indent_stack[heading_token.level+2:] = []
356
357
358 head = _colorize(doc, heading_token, errors, 'heading')
359
360
361 sec = doc.createElement("section")
362 stack[-1].appendChild(sec)
363 stack.append(sec)
364 sec.appendChild(head)
365 indent_stack.append(None)
366
367 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
368 """
369 Add a new list item or field to the DOM tree, with the given
370 bullet or field tag. When necessary, create the associated
371 list.
372 """
373
374 if bullet_token.contents[-1] == '-':
375 list_type = 'ulist'
376 elif bullet_token.contents[-1] == '.':
377 list_type = 'olist'
378 elif bullet_token.contents[-1] == ':':
379 list_type = 'fieldlist'
380 else:
381 raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
382
383
384 newlist = 0
385 if stack[-1].tagName != list_type:
386 newlist = 1
387 elif list_type == 'olist' and stack[-1].tagName == 'olist':
388 old_listitem = stack[-1].childNodes[-1]
389 old_bullet = old_listitem.getAttribute("bullet").split('.')[:-1]
390 new_bullet = bullet_token.contents.split('.')[:-1]
391 if (new_bullet[:-1] != old_bullet[:-1] or
392 int(new_bullet[-1]) != int(old_bullet[-1])+1):
393 newlist = 1
394
395
396 if newlist:
397 if stack[-1].tagName is 'fieldlist':
398
399
400
401
402
403
404 estr = "Lists must be indented."
405 errors.append(StructuringError(estr, bullet_token.startline))
406 if stack[-1].tagName in ('ulist', 'olist', 'fieldlist'):
407 stack.pop()
408 indent_stack.pop()
409
410 if (list_type != 'fieldlist' and indent_stack[-1] is not None and
411 bullet_token.indent == indent_stack[-1]):
412
413
414
415 if bullet_token.startline != 1 or bullet_token.indent != 0:
416 estr = "Lists must be indented."
417 errors.append(StructuringError(estr, bullet_token.startline))
418
419 if list_type == 'fieldlist':
420
421 for tok in stack[2:]:
422 if tok.tagName != "section":
423 estr = "Fields must be at the top level."
424 errors.append(
425 StructuringError(estr, bullet_token.startline))
426 break
427 stack[2:] = []
428 indent_stack[2:] = []
429
430
431 lst = doc.createElement(list_type)
432 stack[-1].appendChild(lst)
433 stack.append(lst)
434 indent_stack.append(bullet_token.indent)
435 if list_type == 'olist':
436 start = bullet_token.contents.split('.')[:-1]
437 if start != '1':
438 lst.setAttribute("start", start[-1])
439
440
441
442
443
444 if list_type == 'fieldlist':
445 li = doc.createElement("field")
446 token_words = bullet_token.contents[1:-1].split(None, 1)
447 tag_elt = doc.createElement("tag")
448 tag_elt.appendChild(doc.createTextNode(token_words[0]))
449 li.appendChild(tag_elt)
450
451 if len(token_words) > 1:
452 arg_elt = doc.createElement("arg")
453 arg_elt.appendChild(doc.createTextNode(token_words[1]))
454 li.appendChild(arg_elt)
455 else:
456 li = doc.createElement("li")
457 if list_type == 'olist':
458 li.setAttribute("bullet", bullet_token.contents)
459
460
461 stack[-1].appendChild(li)
462 stack.append(li)
463 indent_stack.append(None)
464
465
466
467
468
469
471 """
472 C{Token}s are an intermediate data structure used while
473 constructing the structuring DOM tree for a formatted docstring.
474 There are five types of C{Token}:
475
476 - Paragraphs
477 - Literal blocks
478 - Doctest blocks
479 - Headings
480 - Bullets
481
482 The text contained in each C{Token} is stored in the
483 C{contents} variable. The string in this variable has been
484 normalized. For paragraphs, this means that it has been converted
485 into a single line of text, with newline/indentation replaced by
486 single spaces. For literal blocks and doctest blocks, this means
487 that the appropriate amount of leading whitespace has been removed
488 from each line.
489
490 Each C{Token} has an indentation level associated with it,
491 stored in the C{indent} variable. This indentation level is used
492 by the structuring procedure to assemble hierarchical blocks.
493
494 @type tag: C{string}
495 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA}
496 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK}
497 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}.
498
499 @type startline: C{int}
500 @ivar startline: The line on which this C{Token} begins. This
501 line number is only used for issuing errors.
502
503 @type contents: C{string}
504 @ivar contents: The normalized text contained in this C{Token}.
505
506 @type indent: C{int} or C{None}
507 @ivar indent: The indentation level of this C{Token} (in
508 number of leading spaces). A value of C{None} indicates an
509 unknown indentation; this is used for list items and fields
510 that begin with one-line paragraphs.
511
512 @type level: C{int} or C{None}
513 @ivar level: The heading-level of this C{Token} if it is a
514 heading; C{None}, otherwise. Valid heading levels are 0, 1,
515 and 2.
516
517 @type PARA: C{string}
518 @cvar PARA: The C{tag} value for paragraph C{Token}s.
519 @type LBLOCK: C{string}
520 @cvar LBLOCK: The C{tag} value for literal C{Token}s.
521 @type DTBLOCK: C{string}
522 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s.
523 @type HEADING: C{string}
524 @cvar HEADING: The C{tag} value for heading C{Token}s.
525 @type BULLET: C{string}
526 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag}
527 value is also used for field tag C{Token}s, since fields
528 function syntactically the same as list items.
529 """
530
531 PARA = "para"
532 LBLOCK = "literalblock"
533 DTBLOCK = "doctestblock"
534 HEADING = "heading"
535 BULLET = "bullet"
536
537 - def __init__(self, tag, startline, contents, indent, level=None):
538 """
539 Create a new C{Token}.
540
541 @param tag: The type of the new C{Token}.
542 @type tag: C{string}
543 @param startline: The line on which the new C{Token} begins.
544 @type startline: C{int}
545 @param contents: The normalized contents of the new C{Token}.
546 @type contents: C{string}
547 @param indent: The indentation of the new C{Token} (in number
548 of leading spaces). A value of C{None} indicates an
549 unknown indentation.
550 @type indent: C{int} or C{None}
551 @param level: The heading-level of this C{Token} if it is a
552 heading; C{None}, otherwise.
553 @type level: C{int} or C{None}
554 """
555 self.tag = tag
556 self.startline = startline
557 self.contents = contents
558 self.indent = indent
559 self.level = level
560
562 """
563 @rtype: C{string}
564 @return: the formal representation of this C{Token}.
565 C{Token}s have formal representaitons of the form::
566 <Token: para at line 12>
567 """
568 return '<Token: %s at line %s>' % (self.tag, self.startline)
569
571 """
572 @return: a DOM representation of this C{Token}.
573 @rtype: L{xml.dom.minidom.Element}
574 """
575 e = doc.createElement(self.tag)
576 e.appendChild(doc.createTextNode(self.contents))
577 return e
578
579
580
581
582 _ULIST_BULLET = '[-]( +|$)'
583 _OLIST_BULLET = '(\d+[.])+( +|$)'
584 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:( +|$)'
585 _BULLET_RE = re.compile(_ULIST_BULLET + '|' +
586 _OLIST_BULLET + '|' +
587 _FIELD_BULLET)
588 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET)
589 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET)
590 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET
591
593 """
594 Construct a L{Token} containing the doctest block starting at
595 C{lines[start]}, and append it to C{tokens}. C{block_indent}
596 should be the indentation of the doctest block. Any errors
597 generated while tokenizing the doctest block will be appended to
598 C{errors}.
599
600 @param lines: The list of lines to be tokenized
601 @param start: The index into C{lines} of the first line of the
602 doctest block to be tokenized.
603 @param block_indent: The indentation of C{lines[start]}. This is
604 the indentation of the doctest block.
605 @param errors: A list where any errors generated during parsing
606 will be stored. If no list is specified, then errors will
607 generate exceptions.
608 @return: The line number of the first line following the doctest
609 block.
610
611 @type lines: C{list} of C{string}
612 @type start: C{int}
613 @type block_indent: C{int}
614 @type tokens: C{list} of L{Token}
615 @type errors: C{list} of L{ParseError}
616 @rtype: C{int}
617 """
618
619
620
621 min_indent = block_indent
622
623 linenum = start + 1
624 while linenum < len(lines):
625
626 line = lines[linenum]
627 indent = len(line) - len(line.lstrip())
628
629
630 if indent == len(line): break
631
632
633 if indent < block_indent:
634 min_indent = min(min_indent, indent)
635 estr = 'Improper doctest block indentation.'
636 errors.append(TokenizationError(estr, linenum))
637
638
639 linenum += 1
640
641
642 contents = [line[min_indent:] for line in lines[start:linenum]]
643 contents = '\n'.join(contents)
644 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent))
645 return linenum
646
648 """
649 Construct a L{Token} containing the literal block starting at
650 C{lines[start]}, and append it to C{tokens}. C{block_indent}
651 should be the indentation of the literal block. Any errors
652 generated while tokenizing the literal block will be appended to
653 C{errors}.
654
655 @param lines: The list of lines to be tokenized
656 @param start: The index into C{lines} of the first line of the
657 literal block to be tokenized.
658 @param block_indent: The indentation of C{lines[start]}. This is
659 the indentation of the literal block.
660 @param errors: A list of the errors generated by parsing. Any
661 new errors generated while will tokenizing this paragraph
662 will be appended to this list.
663 @return: The line number of the first line following the literal
664 block.
665
666 @type lines: C{list} of C{string}
667 @type start: C{int}
668 @type block_indent: C{int}
669 @type tokens: C{list} of L{Token}
670 @type errors: C{list} of L{ParseError}
671 @rtype: C{int}
672 """
673 linenum = start + 1
674 while linenum < len(lines):
675
676 line = lines[linenum]
677 indent = len(line) - len(line.lstrip())
678
679
680
681 if len(line) != indent and indent <= block_indent:
682 break
683
684
685 linenum += 1
686
687
688 contents = [line[block_indent+1:] for line in lines[start:linenum]]
689 contents = '\n'.join(contents)
690 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents)
691 tokens.append(Token(Token.LBLOCK, start, contents, block_indent))
692 return linenum
693
695 """
696 Construct L{Token}s for the bullet and the first paragraph of the
697 list item (or field) starting at C{lines[start]}, and append them
698 to C{tokens}. C{bullet_indent} should be the indentation of the
699 list item. Any errors generated while tokenizing will be
700 appended to C{errors}.
701
702 @param lines: The list of lines to be tokenized
703 @param start: The index into C{lines} of the first line of the
704 list item to be tokenized.
705 @param bullet_indent: The indentation of C{lines[start]}. This is
706 the indentation of the list item.
707 @param errors: A list of the errors generated by parsing. Any
708 new errors generated while will tokenizing this paragraph
709 will be appended to this list.
710 @return: The line number of the first line following the list
711 item's first paragraph.
712
713 @type lines: C{list} of C{string}
714 @type start: C{int}
715 @type bullet_indent: C{int}
716 @type tokens: C{list} of L{Token}
717 @type errors: C{list} of L{ParseError}
718 @rtype: C{int}
719 """
720 linenum = start + 1
721 para_indent = None
722 doublecolon = lines[start].rstrip()[-2:] == '::'
723
724
725 para_start = _BULLET_RE.match(lines[start], bullet_indent).end()
726 bcontents = lines[start][bullet_indent:para_start].strip()
727
728 while linenum < len(lines):
729
730 line = lines[linenum]
731 indent = len(line) - len(line.lstrip())
732
733
734 if doublecolon: break
735 if line.rstrip()[-2:] == '::': doublecolon = 1
736
737
738 if indent == len(line): break
739
740
741 if indent < bullet_indent: break
742
743
744 if _BULLET_RE.match(line, indent): break
745
746
747
748 if para_indent == None: para_indent = indent
749
750
751 if indent != para_indent: break
752
753
754 linenum += 1
755
756
757 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent))
758
759
760 pcontents = ([lines[start][para_start:].strip()] +
761 [line.strip() for line in lines[start+1:linenum]])
762 pcontents = ' '.join(pcontents).strip()
763 if pcontents:
764 tokens.append(Token(Token.PARA, start, pcontents, para_indent))
765
766
767 return linenum
768
770 """
771 Construct a L{Token} containing the paragraph starting at
772 C{lines[start]}, and append it to C{tokens}. C{para_indent}
773 should be the indentation of the paragraph . Any errors
774 generated while tokenizing the paragraph will be appended to
775 C{errors}.
776
777 @param lines: The list of lines to be tokenized
778 @param start: The index into C{lines} of the first line of the
779 paragraph to be tokenized.
780 @param para_indent: The indentation of C{lines[start]}. This is
781 the indentation of the paragraph.
782 @param errors: A list of the errors generated by parsing. Any
783 new errors generated while will tokenizing this paragraph
784 will be appended to this list.
785 @return: The line number of the first line following the
786 paragraph.
787
788 @type lines: C{list} of C{string}
789 @type start: C{int}
790 @type para_indent: C{int}
791 @type tokens: C{list} of L{Token}
792 @type errors: C{list} of L{ParseError}
793 @rtype: C{int}
794 """
795 linenum = start + 1
796 doublecolon = 0
797 while linenum < len(lines):
798
799 line = lines[linenum]
800 indent = len(line) - len(line.lstrip())
801
802
803 if doublecolon: break
804 if line.rstrip()[-2:] == '::': doublecolon = 1
805
806
807 if indent == len(line): break
808
809
810 if indent != para_indent: break
811
812
813 if _BULLET_RE.match(line, indent): break
814
815
816 if line[indent] == '@':
817 estr = "Possible mal-formatted field item."
818 errors.append(TokenizationError(estr, linenum, is_fatal=0))
819
820
821 linenum += 1
822
823 contents = [line.strip() for line in lines[start:linenum]]
824
825
826 if ((len(contents) < 2) or
827 (contents[1][0] not in _HEADING_CHARS) or
828 (abs(len(contents[0])-len(contents[1])) > 5)):
829 looks_like_heading = 0
830 else:
831 looks_like_heading = 1
832 for char in contents[1]:
833 if char != contents[1][0]:
834 looks_like_heading = 0
835 break
836
837 if looks_like_heading:
838 if len(contents[0]) != len(contents[1]):
839 estr = ("Possible heading typo: the number of "+
840 "underline characters must match the "+
841 "number of heading characters.")
842 errors.append(TokenizationError(estr, start, is_fatal=0))
843 else:
844 level = _HEADING_CHARS.index(contents[1][0])
845 tokens.append(Token(Token.HEADING, start,
846 contents[0], para_indent, level))
847 return start+2
848
849
850 contents = ' '.join(contents)
851 tokens.append(Token(Token.PARA, start, contents, para_indent))
852 return linenum
853
855 """
856 Split a given formatted docstring into an ordered list of
857 C{Token}s, according to the epytext markup rules.
858
859 @param str: The epytext string
860 @type str: C{string}
861 @param errors: A list where any errors generated during parsing
862 will be stored. If no list is specified, then errors will
863 generate exceptions.
864 @type errors: C{list} of L{ParseError}
865 @return: a list of the C{Token}s that make up the given string.
866 @rtype: C{list} of L{Token}
867 """
868 tokens = []
869 lines = str.split('\n')
870
871
872
873 linenum = 0
874 while linenum < len(lines):
875
876 line = lines[linenum]
877 indent = len(line)-len(line.lstrip())
878
879 if indent == len(line):
880
881 linenum += 1
882 continue
883 elif line[indent:indent+4] == '>>> ':
884
885 linenum = _tokenize_doctest(lines, linenum, indent,
886 tokens, errors)
887 elif _BULLET_RE.match(line, indent):
888
889 linenum = _tokenize_listart(lines, linenum, indent,
890 tokens, errors)
891 if tokens[-1].indent != None:
892 indent = tokens[-1].indent
893 else:
894
895 if line[indent] == '@':
896 estr = "Possible mal-formatted field item."
897 errors.append(TokenizationError(estr, linenum, is_fatal=0))
898
899
900 linenum = _tokenize_para(lines, linenum, indent, tokens, errors)
901
902
903 if (tokens[-1].tag == Token.PARA and
904 tokens[-1].contents[-2:] == '::'):
905 tokens[-1].contents = tokens[-1].contents[:-1]
906 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors)
907
908 return tokens
909
910
911
912
913
914
915
916 _BRACE_RE = re.compile('{|}')
917 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$')
918
919 -def _colorize(doc, token, errors, tagName='para'):
920 """
921 Given a string containing the contents of a paragraph, produce a
922 DOM C{Element} encoding that paragraph. Colorized regions are
923 represented using DOM C{Element}s, and text is represented using
924 DOM C{Text}s.
925
926 @param errors: A list of errors. Any newly generated errors will
927 be appended to this list.
928 @type errors: C{list} of C{string}
929
930 @param tagName: The element tag for the DOM C{Element} that should
931 be generated.
932 @type tagName: C{string}
933
934 @return: a DOM C{Element} encoding the given paragraph.
935 @returntype: C{Element}
936 """
937 str = token.contents
938 linenum = 0
939
940
941
942
943
944 stack = [doc.createElement(tagName)]
945
946
947
948
949 openbrace_stack = [0]
950
951
952
953
954
955 start = 0
956 while 1:
957 match = _BRACE_RE.search(str, start)
958 if match == None: break
959 end = match.start()
960
961
962
963
964
965
966
967 if match.group() == '{':
968 if (end>0) and 'A' <= str[end-1] <= 'Z':
969 if (end-1) > start:
970 stack[-1].appendChild(doc.createTextNode(str[start:end-1]))
971 if not _COLORIZING_TAGS.has_key(str[end-1]):
972 estr = "Unknown inline markup tag."
973 errors.append(ColorizingError(estr, token, end-1))
974 stack.append(doc.createElement('unknown'))
975 else:
976 tag = _COLORIZING_TAGS[str[end-1]]
977 stack.append(doc.createElement(tag))
978 else:
979 if end > start:
980 stack[-1].appendChild(doc.createTextNode(str[start:end]))
981 stack.append(doc.createElement('litbrace'))
982 openbrace_stack.append(end)
983 stack[-2].appendChild(stack[-1])
984
985
986 elif match.group() == '}':
987
988 if len(stack) <= 1:
989 estr = "Unbalanced '}'."
990 errors.append(ColorizingError(estr, token, end))
991 start = end + 1
992 continue
993
994
995 if end > start:
996 stack[-1].appendChild(doc.createTextNode(str[start:end]))
997
998
999 if stack[-1].tagName == 'symbol':
1000 if (len(stack[-1].childNodes) != 1 or
1001 not isinstance(stack[-1].childNodes[0], Text)):
1002 estr = "Invalid symbol code."
1003 errors.append(ColorizingError(estr, token, end))
1004 else:
1005 symb = stack[-1].childNodes[0].data
1006 if _SYMBOLS.has_key(symb):
1007
1008 symbol = doc.createElement('symbol')
1009 stack[-2].removeChild(stack[-1])
1010 stack[-2].appendChild(symbol)
1011 symbol.appendChild(doc.createTextNode(symb))
1012 else:
1013 estr = "Invalid symbol code."
1014 errors.append(ColorizingError(estr, token, end))
1015
1016
1017 if stack[-1].tagName == 'escape':
1018 if (len(stack[-1].childNodes) != 1 or
1019 not isinstance(stack[-1].childNodes[0], Text)):
1020 estr = "Invalid escape code."
1021 errors.append(ColorizingError(estr, token, end))
1022 else:
1023 escp = stack[-1].childNodes[0].data
1024 if _ESCAPES.has_key(escp):
1025
1026 stack[-2].removeChild(stack[-1])
1027 escp = _ESCAPES[escp]
1028 stack[-2].appendChild(doc.createTextNode(escp))
1029 elif len(escp) == 1:
1030
1031 stack[-2].removeChild(stack[-1])
1032 stack[-2].appendChild(doc.createTextNode(escp))
1033 else:
1034 estr = "Invalid escape code."
1035 errors.append(ColorizingError(estr, token, end))
1036
1037
1038 if stack[-1].tagName == 'litbrace':
1039 variables = stack[-1].childNodes
1040 stack[-2].removeChild(stack[-1])
1041 stack[-2].appendChild(doc.createTextNode('{'))
1042 for child in variables:
1043 stack[-2].appendChild(child)
1044 stack[-2].appendChild(doc.createTextNode('}'))
1045
1046
1047 if stack[-1].tagName == 'graph':
1048 _colorize_graph(doc, stack[-1], token, end, errors)
1049
1050
1051 if stack[-1].tagName in _LINK_COLORIZING_TAGS:
1052 _colorize_link(doc, stack[-1], token, end, errors)
1053
1054
1055 openbrace_stack.pop()
1056 stack.pop()
1057
1058 start = end+1
1059
1060
1061 if start < len(str):
1062 stack[-1].appendChild(doc.createTextNode(str[start:]))
1063
1064 if len(stack) != 1:
1065 estr = "Unbalanced '{'."
1066 errors.append(ColorizingError(estr, token, openbrace_stack[-1]))
1067
1068 return stack[0]
1069
1070 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph']
1071
1073 """
1074 Eg::
1075 G{classtree}
1076 G{classtree x, y, z}
1077 G{importgraph}
1078 """
1079 bad_graph_spec = False
1080
1081 children = graph.childNodes[:]
1082 for child in children: graph.removeChild(child)
1083
1084 if len(children) != 1 or not isinstance(children[0], Text):
1085 bad_graph_spec = "Bad graph specification"
1086 else:
1087 pieces = children[0].data.split(None, 1)
1088 graphtype = pieces[0].replace(':','').strip().lower()
1089 if graphtype in GRAPH_TYPES:
1090 if len(pieces) == 2:
1091 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]):
1092 args = pieces[1].replace(',', ' ').replace(':','').split()
1093 else:
1094 bad_graph_spec = "Bad graph arg list"
1095 else:
1096 args = []
1097 else:
1098 bad_graph_spec = ("Bad graph type %s -- use one of %s" %
1099 (pieces[0], ', '.join(GRAPH_TYPES)))
1100
1101 if bad_graph_spec:
1102 errors.append(ColorizingError(bad_graph_spec, token, end))
1103 graph.appendChild(doc.createTextNode('none'))
1104 graph.appendChild(doc.createTextNode(''))
1105 return
1106
1107 graph.appendChild(doc.createTextNode(graphtype))
1108 for arg in args:
1109 graph.appendChild(doc.createTextNode(arg))
1110
1112 variables = link.childNodes[:]
1113
1114
1115 if len(variables)==0 or not isinstance(variables[-1], Text):
1116 estr = "Bad %s target." % link.tagName
1117 errors.append(ColorizingError(estr, token, end))
1118 return
1119
1120
1121 match2 = _TARGET_RE.match(variables[-1].data)
1122 if match2:
1123 (text, target) = match2.groups()
1124 variables[-1].data = text
1125
1126 elif len(variables) == 1:
1127 target = variables[0].data
1128 else:
1129 estr = "Bad %s target." % link.tagName
1130 errors.append(ColorizingError(estr, token, end))
1131 return
1132
1133
1134 name_elt = doc.createElement('name')
1135 for child in variables:
1136 name_elt.appendChild(link.removeChild(child))
1137
1138
1139
1140 target = re.sub(r'\s', '', target)
1141 if link.tagName=='uri':
1142 if not re.match(r'\w+:', target):
1143 if re.match(r'\w+@(\w+)(\.\w+)*', target):
1144 target = 'mailto:' + target
1145 else:
1146 target = 'http://'+target
1147 elif link.tagName=='link':
1148
1149 target = re.sub(r'\(.*\)$', '', target)
1150 if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target):
1151 estr = "Bad link target."
1152 errors.append(ColorizingError(estr, token, end))
1153 return
1154
1155
1156 target_elt = doc.createElement('target')
1157 target_elt.appendChild(doc.createTextNode(target))
1158
1159
1160 link.appendChild(name_elt)
1161 link.appendChild(target_elt)
1162
1163
1164
1165
1166
1167 -def to_epytext(tree, indent=0, seclevel=0):
1168 """
1169 Convert a DOM document encoding epytext back to an epytext string.
1170 This is the inverse operation from L{parse}. I.e., assuming there
1171 are no errors, the following is true:
1172 - C{parse(to_epytext(tree)) == tree}
1173
1174 The inverse is true, except that whitespace, line wrapping, and
1175 character escaping may be done differently.
1176 - C{to_epytext(parse(str)) == str} (approximately)
1177
1178 @param tree: A DOM document encoding of an epytext string.
1179 @type tree: L{xml.dom.minidom.Document}
1180 @param indent: The indentation for the string representation of
1181 C{tree}. Each line of the returned string will begin with
1182 C{indent} space characters.
1183 @type indent: C{int}
1184 @param seclevel: The section level that C{tree} appears at. This
1185 is used to generate section headings.
1186 @type seclevel: C{int}
1187 @return: The epytext string corresponding to C{tree}.
1188 @rtype: C{string}
1189 """
1190 if isinstance(tree, Document):
1191 return to_epytext(tree.childNodes[0], indent, seclevel)
1192 if isinstance(tree, Text):
1193 str = re.sub(r'\{', '\0', tree.data)
1194 str = re.sub(r'\}', '\1', str)
1195 return str
1196
1197 if tree.tagName == 'epytext': indent -= 2
1198 if tree.tagName == 'section': seclevel += 1
1199 variables = [to_epytext(c, indent+2, seclevel) for c in tree.childNodes]
1200 childstr = ''.join(variables)
1201
1202
1203 childstr = re.sub(':(\s*)\2', '::\\1', childstr)
1204
1205 if tree.tagName == 'para':
1206 str = wordwrap(childstr, indent)+'\n'
1207 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1208 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1209 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1210 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1211 str = re.sub('\0', 'E{lb}', str)
1212 str = re.sub('\1', 'E{rb}', str)
1213 return str
1214 elif tree.tagName == 'li':
1215 bulletAttr = tree.getAttributeNode('bullet')
1216 if bulletAttr: bullet = bulletAttr.value
1217 else: bullet = '-'
1218 return indent*' '+ bullet + ' ' + childstr.lstrip()
1219 elif tree.tagName == 'heading':
1220 str = re.sub('\0', 'E{lb}',childstr)
1221 str = re.sub('\1', 'E{rb}', str)
1222 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1223 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n'
1224 elif tree.tagName == 'doctestblock':
1225 str = re.sub('\0', '{', childstr)
1226 str = re.sub('\1', '}', str)
1227 lines = [' '+indent*' '+line for line in str.split('\n')]
1228 return '\n'.join(lines) + '\n\n'
1229 elif tree.tagName == 'literalblock':
1230 str = re.sub('\0', '{', childstr)
1231 str = re.sub('\1', '}', str)
1232 lines = [(indent+1)*' '+line for line in str.split('\n')]
1233 return '\2' + '\n'.join(lines) + '\n\n'
1234 elif tree.tagName == 'field':
1235 numargs = 0
1236 while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1
1237 tag = variables[0]
1238 args = variables[1:1+numargs]
1239 body = variables[1+numargs:]
1240 str = (indent)*' '+'@'+variables[0]
1241 if args: str += '(' + ', '.join(args) + ')'
1242 return str + ':\n' + ''.join(body)
1243 elif tree.tagName == 'target':
1244 return '<%s>' % childstr
1245 elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext',
1246 'section', 'olist', 'ulist', 'name'):
1247 return childstr
1248 elif tree.tagName == 'symbol':
1249 return 'E{%s}' % childstr
1250 elif tree.tagName == 'graph':
1251 return 'G{%s}' % ' '.join(variables)
1252 else:
1253 for (tag, name) in _COLORIZING_TAGS.items():
1254 if name == tree.tagName:
1255 return '%s{%s}' % (tag, childstr)
1256 raise ValueError('Unknown DOM element %r' % tree.tagName)
1257
1258 -def to_plaintext(tree, indent=0, seclevel=0):
1259 """
1260 Convert a DOM document encoding epytext to a string representation.
1261 This representation is similar to the string generated by
1262 C{to_epytext}, but C{to_plaintext} removes inline markup, prints
1263 escaped characters in unescaped form, etc.
1264
1265 @param tree: A DOM document encoding of an epytext string.
1266 @type tree: L{xml.dom.minidom.Document}
1267 @param indent: The indentation for the string representation of
1268 C{tree}. Each line of the returned string will begin with
1269 C{indent} space characters.
1270 @type indent: C{int}
1271 @param seclevel: The section level that C{tree} appears at. This
1272 is used to generate section headings.
1273 @type seclevel: C{int}
1274 @return: The epytext string corresponding to C{tree}.
1275 @rtype: C{string}
1276 """
1277 if isinstance(tree, Document):
1278 return to_plaintext(tree.childNodes[0], indent, seclevel)
1279 if isinstance(tree, Text): return tree.data
1280
1281 if tree.tagName == 'section': seclevel += 1
1282
1283
1284 if tree.tagName == 'epytext': cindent = indent
1285 elif tree.tagName == 'li' and tree.getAttributeNode('bullet'):
1286 cindent = indent + 1 + len(tree.getAttributeNode('bullet').value)
1287 else:
1288 cindent = indent + 2
1289 variables = [to_plaintext(c, cindent, seclevel) for c in tree.childNodes]
1290 childstr = ''.join(variables)
1291
1292 if tree.tagName == 'para':
1293 return wordwrap(childstr, indent)+'\n'
1294 elif tree.tagName == 'li':
1295
1296
1297 bulletAttr = tree.getAttributeNode('bullet')
1298 if bulletAttr: bullet = bulletAttr.value
1299 else: bullet = '-'
1300 return indent*' ' + bullet + ' ' + childstr.lstrip()
1301 elif tree.tagName == 'heading':
1302 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1303 return ((indent-2)*' ' + childstr + '\n' +
1304 (indent-2)*' ' + uline + '\n')
1305 elif tree.tagName == 'doctestblock':
1306 lines = [(indent+2)*' '+line for line in childstr.split('\n')]
1307 return '\n'.join(lines) + '\n\n'
1308 elif tree.tagName == 'literalblock':
1309 lines = [(indent+1)*' '+line for line in childstr.split('\n')]
1310 return '\n'.join(lines) + '\n\n'
1311 elif tree.tagName == 'fieldlist':
1312 return childstr
1313 elif tree.tagName == 'field':
1314 numargs = 0
1315 while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1
1316 tag = variables[0]
1317 args = variables[1:1+numargs]
1318 body = variables[1+numargs:]
1319 str = (indent)*' '+'@'+variables[0]
1320 if args: str += '(' + ', '.join(args) + ')'
1321 return str + ':\n' + ''.join(body)
1322 elif tree.tagName == 'uri':
1323 if len(variables) != 2: raise ValueError('Bad URI ')
1324 elif variables[0] == variables[1]: return '<%s>' % variables[1]
1325 else: return '%r<%s>' % (variables[0], variables[1])
1326 elif tree.tagName == 'link':
1327 if len(variables) != 2: raise ValueError('Bad Link')
1328 return '%s' % variables[0]
1329 elif tree.tagName in ('olist', 'ulist'):
1330
1331
1332
1333
1334 return childstr.replace('\n\n', '\n')+'\n'
1335 elif tree.tagName == 'symbol':
1336 return '%s' % childstr
1337 elif tree.tagName == 'graph':
1338 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
1339 else:
1340
1341 return childstr
1342
1343 -def to_debug(tree, indent=4, seclevel=0):
1344 """
1345 Convert a DOM document encoding epytext back to an epytext string,
1346 annotated with extra debugging information. This function is
1347 similar to L{to_epytext}, but it adds explicit information about
1348 where different blocks begin, along the left margin.
1349
1350 @param tree: A DOM document encoding of an epytext string.
1351 @type tree: L{xml.dom.minidom.Document}
1352 @param indent: The indentation for the string representation of
1353 C{tree}. Each line of the returned string will begin with
1354 C{indent} space characters.
1355 @type indent: C{int}
1356 @param seclevel: The section level that C{tree} appears at. This
1357 is used to generate section headings.
1358 @type seclevel: C{int}
1359 @return: The epytext string corresponding to C{tree}.
1360 @rtype: C{string}
1361 """
1362 if isinstance(tree, Document):
1363 return to_debug(tree.childNodes[0], indent, seclevel)
1364 if isinstance(tree, Text):
1365 str = re.sub(r'\{', '\0', tree.data)
1366 str = re.sub(r'\}', '\1', str)
1367 return str
1368
1369 if tree.tagName == 'section': seclevel += 1
1370 variables = [to_debug(c, indent+2, seclevel) for c in tree.childNodes]
1371 childstr = ''.join(variables)
1372
1373
1374 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr)
1375
1376 if tree.tagName == 'para':
1377 str = wordwrap(childstr, indent-6, 69)+'\n'
1378 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1379 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1380 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1381 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1382 str = re.sub('\0', 'E{lb}', str)
1383 str = re.sub('\1', 'E{rb}', str)
1384 lines = str.rstrip().split('\n')
1385 lines[0] = ' P>|' + lines[0]
1386 lines[1:] = [' |'+l for l in lines[1:]]
1387 return '\n'.join(lines)+'\n |\n'
1388 elif tree.tagName == 'li':
1389 bulletAttr = tree.getAttributeNode('bullet')
1390 if bulletAttr: bullet = bulletAttr.value
1391 else: bullet = '-'
1392 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
1393 elif tree.tagName in ('olist', 'ulist'):
1394 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:]
1395 elif tree.tagName == 'heading':
1396 str = re.sub('\0', 'E{lb}', childstr)
1397 str = re.sub('\1', 'E{rb}', str)
1398 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1399 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' +
1400 ' |'+(indent-8)*' ' + uline + '\n')
1401 elif tree.tagName == 'doctestblock':
1402 str = re.sub('\0', '{', childstr)
1403 str = re.sub('\1', '}', str)
1404 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')]
1405 lines[0] = 'DTST>'+lines[0][5:]
1406 return '\n'.join(lines) + '\n |\n'
1407 elif tree.tagName == 'literalblock':
1408 str = re.sub('\0', '{', childstr)
1409 str = re.sub('\1', '}', str)
1410 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')]
1411 lines[0] = ' LIT>'+lines[0][5:]
1412 return '\2' + '\n'.join(lines) + '\n |\n'
1413 elif tree.tagName == 'field':
1414 numargs = 0
1415 while tree.childNodes[numargs+1].tagName == 'arg': numargs += 1
1416 tag = variables[0]
1417 args = variables[1:1+numargs]
1418 body = variables[1+numargs:]
1419 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0]
1420 if args: str += '(' + ', '.join(args) + ')'
1421 return str + ':\n' + ''.join(body)
1422 elif tree.tagName == 'target':
1423 return '<%s>' % childstr
1424 elif tree.tagName in ('fieldlist', 'tag', 'arg', 'epytext',
1425 'section', 'olist', 'ulist', 'name'):
1426 return childstr
1427 elif tree.tagName == 'symbol':
1428 return 'E{%s}' % childstr
1429 elif tree.tagName == 'graph':
1430 return 'G{%s}' % ' '.join(variables)
1431 else:
1432 for (tag, name) in _COLORIZING_TAGS.items():
1433 if name == tree.tagName:
1434 return '%s{%s}' % (tag, childstr)
1435 raise ValueError('Unknown DOM element %r' % tree.tagName)
1436
1437
1438
1439
1440 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1441 """
1442 Pretty-parse the string. This parses the string, and catches any
1443 warnings or errors produced. Any warnings and errors are
1444 displayed, and the resulting DOM parse structure is returned.
1445
1446 @param str: The string to parse.
1447 @type str: C{string}
1448 @param show_warnings: Whether or not to display non-fatal errors
1449 generated by parsing C{str}.
1450 @type show_warnings: C{boolean}
1451 @param show_errors: Whether or not to display fatal errors
1452 generated by parsing C{str}.
1453 @type show_errors: C{boolean}
1454 @param stream: The stream that warnings and errors should be
1455 written to.
1456 @type stream: C{stream}
1457 @return: a DOM document encoding the contents of C{str}.
1458 @rtype: L{xml.dom.minidom.Document}
1459 @raise SyntaxError: If any fatal errors were encountered.
1460 """
1461 errors = []
1462 confused = 0
1463 try:
1464 val = parse(str, errors)
1465 warnings = [e for e in errors if not e.is_fatal()]
1466 errors = [e for e in errors if e.is_fatal()]
1467 except:
1468 confused = 1
1469
1470 if not show_warnings: warnings = []
1471 warnings.sort()
1472 errors.sort()
1473 if warnings:
1474 print >>stream, '='*SCRWIDTH
1475 print >>stream, "WARNINGS"
1476 print >>stream, '-'*SCRWIDTH
1477 for warning in warnings:
1478 print >>stream, warning.as_warning()
1479 print >>stream, '='*SCRWIDTH
1480 if errors and show_errors:
1481 if not warnings: print >>stream, '='*SCRWIDTH
1482 print >>stream, "ERRORS"
1483 print >>stream, '-'*SCRWIDTH
1484 for error in errors:
1485 print >>stream, error
1486 print >>stream, '='*SCRWIDTH
1487
1488 if confused: raise
1489 elif errors: raise SyntaxError('Encountered Errors')
1490 else: return val
1491
1492
1493
1494
1495
1497 """
1498 An error generated while tokenizing a formatted documentation
1499 string.
1500 """
1501
1503 """
1504 An error generated while structuring a formatted documentation
1505 string.
1506 """
1507
1509 """
1510 An error generated while colorizing a paragraph.
1511 """
1512 - def __init__(self, descr, token, charnum, is_fatal=1):
1513 """
1514 Construct a new colorizing exception.
1515
1516 @param descr: A short description of the error.
1517 @type descr: C{string}
1518 @param token: The token where the error occured
1519 @type token: L{Token}
1520 @param charnum: The character index of the position in
1521 C{token} where the error occured.
1522 @type charnum: C{int}
1523 """
1524 ParseError.__init__(self, descr, token.startline, is_fatal)
1525 self.token = token
1526 self.charnum = charnum
1527
1528 CONTEXT_RANGE = 20
1530 RANGE = self.CONTEXT_RANGE
1531 if self.charnum <= RANGE:
1532 left = self.token.contents[0:self.charnum]
1533 else:
1534 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum]
1535 if (len(self.token.contents)-self.charnum) <= RANGE:
1536 right = self.token.contents[self.charnum:]
1537 else:
1538 right = (self.token.contents[self.charnum:self.charnum+RANGE]
1539 + '...')
1540 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1541
1542
1543
1544
1545
1547 """
1548 Return a DOM document matching the epytext DTD, containing a
1549 single literal block. That literal block will include the
1550 contents of the given string. This method is typically used as a
1551 fall-back when the parser fails.
1552
1553 @param str: The string which should be enclosed in a literal
1554 block.
1555 @type str: C{string}
1556
1557 @return: A DOM document containing C{str} in a single literal
1558 block.
1559 @rtype: L{xml.dom.minidom.Document}
1560 """
1561 doc = Document()
1562 epytext = doc.createElement('epytext')
1563 lit = doc.createElement('literalblock')
1564 doc.appendChild(epytext)
1565 epytext.appendChild(lit)
1566 lit.appendChild(doc.createTextNode(str))
1567 return doc
1568
1570 """
1571 Return a DOM document matching the epytext DTD, containing a
1572 single paragraph. That paragraph will include the contents of the
1573 given string. This can be used to wrap some forms of
1574 automatically generated information (such as type names) in
1575 paragraphs.
1576
1577 @param str: The string which should be enclosed in a paragraph.
1578 @type str: C{string}
1579
1580 @return: A DOM document containing C{str} in a single paragraph.
1581 @rtype: L{xml.dom.minidom.Document}
1582 """
1583 doc = Document()
1584 epytext = doc.createElement('epytext')
1585 para = doc.createElement('para')
1586 doc.appendChild(epytext)
1587 epytext.appendChild(para)
1588 para.appendChild(doc.createTextNode(str))
1589 return doc
1590
1591
1592
1593
1594 from epydoc.docwriter.dotgraph import *
1595
1597 """
1598 Parse the given docstring, which is formatted using epytext; and
1599 return a C{ParsedDocstring} representation of its contents.
1600 @param docstring: The docstring to parse
1601 @type docstring: C{string}
1602 @param errors: A list where any errors generated during parsing
1603 will be stored.
1604 @type errors: C{list} of L{ParseError}
1605 @param options: Extra options. Unknown options are ignored.
1606 Currently, no extra options are defined.
1607 @rtype: L{ParsedDocstring}
1608 """
1609 return ParsedEpytextDocstring(parse(docstring, errors))
1610
1611 -class ParsedEpytextDocstring(ParsedDocstring):
1612 SYMBOL_TO_HTML = {
1613
1614 '<-': 'larr', '->': 'rarr', '^': 'uarr', 'v': 'darr',
1615
1616
1617 'alpha': 'alpha', 'beta': 'beta', 'gamma': 'gamma',
1618 'delta': 'delta', 'epsilon': 'epsilon', 'zeta': 'zeta',
1619 'eta': 'eta', 'theta': 'theta', 'iota': 'iota',
1620 'kappa': 'kappa', 'lambda': 'lambda', 'mu': 'mu',
1621 'nu': 'nu', 'xi': 'xi', 'omicron': 'omicron',
1622 'pi': 'pi', 'rho': 'rho', 'sigma': 'sigma',
1623 'tau': 'tau', 'upsilon': 'upsilon', 'phi': 'phi',
1624 'chi': 'chi', 'psi': 'psi', 'omega': 'omega',
1625 'Alpha': 'Alpha', 'Beta': 'Beta', 'Gamma': 'Gamma',
1626 'Delta': 'Delta', 'Epsilon': 'Epsilon', 'Zeta': 'Zeta',
1627 'Eta': 'Eta', 'Theta': 'Theta', 'Iota': 'Iota',
1628 'Kappa': 'Kappa', 'Lambda': 'Lambda', 'Mu': 'Mu',
1629 'Nu': 'Nu', 'Xi': 'Xi', 'Omicron': 'Omicron',
1630 'Pi': 'Pi', 'Rho': 'Rho', 'Sigma': 'Sigma',
1631 'Tau': 'Tau', 'Upsilon': 'Upsilon', 'Phi': 'Phi',
1632 'Chi': 'Chi', 'Psi': 'Psi', 'Omega': 'Omega',
1633
1634
1635 'larr': 'larr', 'rarr': 'rarr', 'uarr': 'uarr',
1636 'darr': 'darr', 'harr': 'harr', 'crarr': 'crarr',
1637 'lArr': 'lArr', 'rArr': 'rArr', 'uArr': 'uArr',
1638 'dArr': 'dArr', 'hArr': 'hArr',
1639 'copy': 'copy', 'times': 'times', 'forall': 'forall',
1640 'exist': 'exist', 'part': 'part',
1641 'empty': 'empty', 'isin': 'isin', 'notin': 'notin',
1642 'ni': 'ni', 'prod': 'prod', 'sum': 'sum',
1643 'prop': 'prop', 'infin': 'infin', 'ang': 'ang',
1644 'and': 'and', 'or': 'or', 'cap': 'cap', 'cup': 'cup',
1645 'int': 'int', 'there4': 'there4', 'sim': 'sim',
1646 'cong': 'cong', 'asymp': 'asymp', 'ne': 'ne',
1647 'equiv': 'equiv', 'le': 'le', 'ge': 'ge',
1648 'sub': 'sub', 'sup': 'sup', 'nsub': 'nsub',
1649 'sube': 'sube', 'supe': 'supe', 'oplus': 'oplus',
1650 'otimes': 'otimes', 'perp': 'perp',
1651
1652
1653 'infinity': 'infin', 'integral': 'int', 'product': 'prod',
1654 '<=': 'le', '>=': 'ge',
1655 }
1656
1657 SYMBOL_TO_LATEX = {
1658
1659 '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)',
1660 '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)',
1661
1662
1663
1664 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma':
1665 r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon':
1666 r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)',
1667 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa':
1668 r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)',
1669 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi':
1670 r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau':
1671 r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)',
1672 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega':
1673 r'\(\omega\)',
1674
1675 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma':
1676 r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon':
1677 r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)',
1678 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa':
1679 r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)',
1680 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi':
1681 r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau':
1682 r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)',
1683 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega':
1684 r'\(\Omega\)',
1685
1686
1687 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr':
1688 r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr':
1689 r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)',
1690 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr':
1691 r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr':
1692 r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}',
1693 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist':
1694 r'\(\exists\)', 'part': r'\(\partial\)', 'empty':
1695 r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)',
1696 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)',
1697 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang':
1698 r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap':
1699 r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4':
1700 r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)',
1701 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv':
1702 r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub':
1703 r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)',
1704 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus':
1705 r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)',
1706
1707
1708 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product':
1709 r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)',
1710 }
1711
1712 - def __init__(self, dom_tree):
1713 if isinstance(dom_tree, Document):
1714 dom_tree = dom_tree.childNodes[0]
1715 self._tree = dom_tree
1716
1717 self._html = self._latex = self._plaintext = None
1718 self._terms = None
1719
1720 - def to_html(self, docstring_linker, directory=None, docindex=None,
1721 context=None, **options):
1722 if self._html is not None: return self._html
1723 if self._tree is None: return ''
1724 indent = options.get('indent', 0)
1725 self._html = self._to_html(self._tree, docstring_linker, directory,
1726 docindex, context, indent)
1727 return self._html
1728
1729 - def to_latex(self, docstring_linker, **options):
1730 if self._latex is not None: return self._latex
1731 if self._tree is None: return ''
1732 indent = options.get('indent', 0)
1733 self._hyperref = options.get('hyperref', 1)
1734 self._latex = self._to_latex(self._tree, docstring_linker, indent)
1735 return self._latex
1736
1737 - def to_plaintext(self, docstring_linker, **options):
1738
1739
1740 if self._tree is None: return ''
1741 if 'indent' in options:
1742 self._plaintext = to_plaintext(self._tree,
1743 indent=options['indent'])
1744 else:
1745 self._plaintext = to_plaintext(self._tree)
1746 return self._plaintext
1747
1748 - def _index_term_key(self, tree):
1749 str = to_plaintext(tree)
1750 str = re.sub(r'\s\s+', '-', str)
1751 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1752
1753 - def _to_html(self, tree, linker, directory, docindex, context,
1754 indent=0, seclevel=0):
1755 if isinstance(tree, Text):
1756 return plaintext_to_html(tree.data)
1757
1758 if tree.tagName == 'epytext': indent -= 2
1759 if tree.tagName == 'section': seclevel += 1
1760
1761
1762 variables = [self._to_html(c, linker, directory, docindex, context,
1763 indent+2, seclevel)
1764 for c in tree.childNodes]
1765
1766
1767
1768 for i in range(len(variables)-1):
1769 if (not isinstance(tree.childNodes[i], Text) and
1770 tree.childNodes[i].tagName == 'para' and
1771 (isinstance(tree.childNodes[i+1], Text) or
1772 tree.childNodes[i+1].tagName != 'para')):
1773 variables[i] = ' '*(indent+2)+variables[i][5+indent:-5]+'\n'
1774 if (tree.hasChildNodes() and
1775 not isinstance(tree.childNodes[-1], Text) and
1776 tree.childNodes[-1].tagName == 'para'):
1777 variables[-1] = ' '*(indent+2)+variables[-1][5+indent:-5]+'\n'
1778
1779
1780 childstr = ''.join(variables)
1781
1782
1783 if tree.tagName == 'para':
1784 return wordwrap('<p>%s</p>' % childstr, indent)
1785 elif tree.tagName == 'code':
1786 return '<code>%s</code>' % childstr
1787 elif tree.tagName == 'uri':
1788 return ('<a href="%s" target="_top">%s</a>' %
1789 (variables[1], variables[0]))
1790 elif tree.tagName == 'link':
1791 return linker.translate_identifier_xref(variables[1], variables[0])
1792 elif tree.tagName == 'italic':
1793 return '<i>%s</i>' % childstr
1794 elif tree.tagName == 'math':
1795 return '<i class="math">%s</i>' % childstr
1796 elif tree.tagName == 'indexed':
1797 term = tree.cloneNode(1)
1798 term.tagName = 'epytext'
1799 return linker.translate_indexterm(ParsedEpytextDocstring(term))
1800
1801
1802 elif tree.tagName == 'bold':
1803 return '<b>%s</b>' % childstr
1804 elif tree.tagName == 'ulist':
1805 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ')
1806 elif tree.tagName == 'olist':
1807 startAttr = tree.getAttributeNode('start')
1808 if startAttr: start = ' start="%s"' % startAttr.value
1809 else: start = ''
1810 return ('%s<ol%s>\n%s%s</ol>\n' %
1811 (indent*' ', start, childstr, indent*' '))
1812 elif tree.tagName == 'li':
1813 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ')
1814 elif tree.tagName == 'heading':
1815 return ('%s<h%s class="heading">%s</h%s>\n' %
1816 ((indent-2)*' ', seclevel, childstr, seclevel))
1817 elif tree.tagName == 'literalblock':
1818 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
1819 elif tree.tagName == 'doctestblock':
1820 dtb = colorize_doctestblock(childstr.strip())
1821 return '<pre class="doctestblock">\n%s</pre>\n' % dtb
1822 elif tree.tagName == 'fieldlist':
1823 raise AssertionError("There should not be any field lists left")
1824 elif tree.tagName in ('epytext', 'section', 'tag', 'arg',
1825 'name', 'target', 'html'):
1826 return childstr
1827 elif tree.tagName == 'symbol':
1828 symbol = tree.childNodes[0].data
1829 if self.SYMBOL_TO_HTML.has_key(symbol):
1830 return '&%s;' % self.SYMBOL_TO_HTML[symbol]
1831 else:
1832 return '[??]'
1833 elif tree.tagName == 'graph':
1834
1835 graph = self._build_graph(variables[0], variables[1:], linker,
1836 docindex, context)
1837 if not graph: return ''
1838
1839 image_url = '%s.gif' % graph.uid
1840 image_file = os.path.join(directory, image_url)
1841 return graph.to_html(image_file, image_url)
1842 else:
1843 raise ValueError('Unknown epytext DOM element %r' % tree.tagName)
1844
1845
1846 - def _build_graph(self, graph_type, graph_args, linker,
1847 docindex, context):
1848
1849 if graph_type == 'classtree':
1850 if graph_args:
1851 bases = [docindex.find(name, context)
1852 for name in graph_args]
1853 elif isinstance(context, ClassDoc):
1854 bases = [context]
1855 else:
1856 log.warning("Could not construct class tree: you must "
1857 "specify one or more base classes.")
1858 return None
1859 return class_tree_graph(bases, linker, context)
1860 elif graph_type == 'packagetree':
1861 if graph_args:
1862 packages = [docindex.find(name, context)
1863 for name in graph_args]
1864 elif isinstance(context, ModuleDoc):
1865 packages = [context]
1866 else:
1867 log.warning("Could not construct package tree: you must "
1868 "specify one or more root packages.")
1869 return None
1870 return package_tree_graph(packages, linker, context)
1871 elif graph_type == 'importgraph':
1872 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)]
1873 return import_graph(modules, docindex, linker, context)
1874
1875 elif graph_type == 'callgraph':
1876 if graph_args:
1877 docs = [docindex.find(name, context) for name in graph_args]
1878 docs = [doc for doc in docs if doc is not None]
1879 else:
1880 docs = [context]
1881 return call_graph(docs, docindex, linker, context)
1882 else:
1883 log.warning("Unknown graph type %s" % graph_type)
1884
1885
1886 - def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
1887 if isinstance(tree, Text):
1888 return plaintext_to_latex(tree.data, breakany=breakany)
1889
1890 if tree.tagName == 'section': seclevel += 1
1891
1892
1893 if tree.tagName == 'epytext': cindent = indent
1894 else: cindent = indent + 2
1895 variables = [self._to_latex(c, linker, cindent, seclevel, breakany)
1896 for c in tree.childNodes]
1897 childstr = ''.join(variables)
1898
1899 if tree.tagName == 'para':
1900 return wordwrap(childstr, indent)+'\n'
1901 elif tree.tagName == 'code':
1902 return '\\texttt{%s}' % childstr
1903 elif tree.tagName == 'uri':
1904 if len(variables) != 2: raise ValueError('Bad URI ')
1905 if self._hyperref:
1906
1907 uri = tree.childNodes[1].childNodes[0].data
1908 uri = uri.replace('{\\textasciitilde}', '~')
1909 uri = uri.replace('\\#', '#')
1910 if variables[0] == variables[1]:
1911 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1])
1912 else:
1913 return ('%s\\footnote{\\href{%s}{%s}}' %
1914 (variables[0], uri, variables[1]))
1915 else:
1916 if variables[0] == variables[1]:
1917 return '\\textit{%s}' % variables[1]
1918 else:
1919 return '%s\\footnote{%s}' % (variables[0], variables[1])
1920 elif tree.tagName == 'link':
1921 if len(variables) != 2: raise ValueError('Bad Link')
1922 return linker.translate_identifier_xref(variables[1], variables[0])
1923 elif tree.tagName == 'italic':
1924 return '\\textit{%s}' % childstr
1925 elif tree.tagName == 'math':
1926 return '\\textit{%s}' % childstr
1927 elif tree.tagName == 'indexed':
1928 term = tree.cloneNode(1)
1929 term.tagName = 'epytext'
1930 return linker.translate_indexterm(ParsedEpytextDocstring(term))
1931 elif tree.tagName == 'bold':
1932 return '\\textbf{%s}' % childstr
1933 elif tree.tagName == 'li':
1934 return indent*' ' + '\\item ' + childstr.lstrip()
1935 elif tree.tagName == 'heading':
1936 return ' '*(indent-2) + '(section) %s\n\n' % childstr
1937 elif tree.tagName == 'doctestblock':
1938 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
1939 elif tree.tagName == 'literalblock':
1940 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
1941 elif tree.tagName == 'fieldlist':
1942 return indent*' '+'{omitted fieldlist}\n'
1943 elif tree.tagName == 'olist':
1944 return (' '*indent + '\\begin{enumerate}\n\n' +
1945 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
1946 childstr +
1947 ' '*indent + '\\end{enumerate}\n\n')
1948 elif tree.tagName == 'ulist':
1949 return (' '*indent + '\\begin{itemize}\n' +
1950 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
1951 childstr +
1952 ' '*indent + '\\end{itemize}\n\n')
1953 elif tree.tagName == 'symbol':
1954 symbol = tree.childNodes[0].data
1955 if self.SYMBOL_TO_LATEX.has_key(symbol):
1956 return r'%s' % self.SYMBOL_TO_LATEX[symbol]
1957 else:
1958 return '[??]'
1959 elif tree.tagName == 'graph':
1960 return '(GRAPH)'
1961
1962 else:
1963
1964 return childstr
1965
1966 - def summary(self):
1967 if self._tree is None: return self
1968
1969
1970
1971 tree = self._tree
1972
1973 doc = Document()
1974 epytext = doc.createElement('epytext')
1975 doc.appendChild(epytext)
1976
1977
1978 variables = tree.childNodes
1979 while (len(variables) > 0) and (variables[0].tagName != 'para'):
1980 if variables[0].tagName in ('section', 'ulist', 'olist', 'li'):
1981 variables = variables[0].childNodes
1982 else:
1983 variables = variables[1:]
1984
1985
1986
1987 if (len(variables) == 0 and len(tree.childNodes) == 1 and
1988 tree.childNodes[0].tagName == 'literalblock'):
1989 str = re.split(r'\n\s*(\n|$).*',
1990 tree.childNodes[0].childNodes[0].data, 1)[0]
1991 variables = [doc.createElement('para')]
1992 variables[0].appendChild(doc.createTextNode(str))
1993
1994
1995 if len(variables) == 0: return ParsedEpytextDocstring(doc)
1996
1997
1998 parachildren = variables[0].childNodes
1999 para = doc.createElement('para')
2000 epytext.appendChild(para)
2001 for parachild in parachildren:
2002 if isinstance(parachild, Text):
2003 m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', parachild.data)
2004 if m:
2005 para.appendChild(doc.createTextNode(m.group(1)))
2006 return ParsedEpytextDocstring(doc)
2007 para.appendChild(parachild.cloneNode(1))
2008
2009 return ParsedEpytextDocstring(doc)
2010
2011 - def split_fields(self, errors=None):
2012 if self._tree is None: return (self, ())
2013 tree = self._tree.cloneNode(1)
2014 fields = []
2015
2016 if (tree.hasChildNodes() and
2017 tree.childNodes[-1].tagName == 'fieldlist' and
2018 tree.childNodes[-1].hasChildNodes()):
2019 field_nodes = tree.childNodes[-1].childNodes
2020 tree.removeChild(tree.childNodes[-1])
2021
2022 for field in field_nodes:
2023
2024 tag = field.childNodes[0].childNodes[0].data.lower()
2025 field.removeChild(field.childNodes[0])
2026
2027
2028 if field.childNodes and field.childNodes[0].tagName == 'arg':
2029 arg = field.childNodes[0].childNodes[0].data
2030 field.removeChild(field.childNodes[0])
2031 else:
2032 arg = None
2033
2034
2035 field.tagName = 'epytext'
2036 fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
2037
2038
2039 if tree.hasChildNodes() and tree.childNodes[0].hasChildNodes():
2040 descr = tree
2041 else:
2042 descr = None
2043
2044 return ParsedEpytextDocstring(descr), fields
2045
2046 - def index_terms(self):
2047 if self._terms is None:
2048 self._terms = []
2049 self._index_terms(self._tree, self._terms)
2050 return self._terms
2051
2052 - def _index_terms(self, tree, terms):
2053 if tree is None or isinstance(tree, Text):
2054 return
2055
2056 if tree.tagName == 'indexed':
2057 term = tree.cloneNode(1)
2058 term.tagName = 'epytext'
2059 terms.append(ParsedEpytextDocstring(term))
2060
2061
2062 for child in tree.childNodes:
2063 self._index_terms(child, terms)
2064