epydoc.markup.javadoc

  1  # 
  2  # javadoc.py: javadoc docstring parsing 
  3  # Edward Loper 
  4  # 
  5  # Created [07/03/03 12:37 PM] 
  6  # $Id: javadoc.py 946 2006-03-10 00:40:50Z edloper $ 
  7  # 
  8   
  9  """ 
 10  Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>} 
 11  docstrings.  Javadoc is an HTML-based markup language that was 
 12  developed for documenting Java APIs with inline comments.  It consists 
 13  of raw HTML, augmented by Javadoc tags.  There are two types of 
 14  Javadoc tag: 
 15   
 16    - X{Javadoc block tags} correspond to Epydoc fields.  They are 
 17      marked by starting a line with a string of the form \"C{@M{tag} 
 18      [M{arg}]}\", where C{M{tag}} indicates the type of block, and 
 19      C{M{arg}} is an optional argument.  (For fields that take 
 20      arguments, Javadoc assumes that the single word immediately 
 21      following the tag is an argument; multi-word arguments cannot be 
 22      used with javadoc.)   
 23     
 24    - X{inline Javadoc tags} are used for inline markup.  In particular, 
 25      epydoc uses them for crossreference links between documentation. 
 26      Inline tags may appear anywhere in the text, and have the form 
 27      \"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the 
 28      type of inline markup, and C{M{args}} are optional arguments. 
 29   
 30  Epydoc supports all Javadoc tags, I{except}: 
 31    - C{{@docRoot}}, which gives the (relative) URL of the generated 
 32      documentation's root. 
 33    - C{{@inheritDoc}}, which copies the documentation of the nearest 
 34      overridden object.  This can be used to combine the documentation 
 35      of the overridden object with the documentation of the 
 36      overridding object. 
 37    - C{@serial}, C{@serialField}, and C{@serialData} which describe the 
 38      serialization (pickling) of an object. 
 39    - C{{@value}}, which copies the value of a constant. 
 40   
 41  @warning: Epydoc only supports HTML output for Javadoc docstrings. 
 42  """ 
 43  __docformat__ = 'epytext en' 
 44   
 45  # Imports 
 46  import re 
 47  from xml.dom.minidom import * 
 48  from epydoc.markup import * 
 49   
 50 -def parse_docstring(docstring, errors, **options): 
 51      """ 
 52      Parse the given docstring, which is formatted using Javadoc; and 
 53      return a C{ParsedDocstring} representation of its contents. 
 54      @param docstring: The docstring to parse 
 55      @type docstring: C{string} 
 56      @param errors: A list where any errors generated during parsing 
 57          will be stored. 
 58      @type errors: C{list} of L{ParseError} 
 59      @param options: Extra options.  Unknown options are ignored. 
 60          Currently, no extra options are defined. 
 61      @rtype: L{ParsedDocstring} 
 62      """ 
 63      return ParsedJavadocDocstring(docstring, errors) 
 64   
 65 -class ParsedJavadocDocstring(ParsedDocstring): 
 66      """ 
 67      An encoded version of a Javadoc docstring.  Since Javadoc is a 
 68      fairly simple markup language, we don't do any processing in 
 69      advance; instead, we wait to split fields or resolve 
 70      crossreference links until we need to. 
 71   
 72      @group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE 
 73      @cvar _ARG_FIELDS: A list of the fields that take arguments. 
 74          Since Javadoc doesn't mark arguments in any special way, we 
 75          must consult this list to decide whether the first word of a 
 76          field is an argument or not. 
 77      @cvar _FIELD_RE: A regular expression used to search for Javadoc 
 78          block tags. 
 79   
 80      @group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE 
 81      @cvar _LINK_SPLIT_RE: A regular expression used to search for 
 82          Javadoc inline tags. 
 83      @cvar _LINK_RE: A regular expression used to process Javadoc 
 84          inline tags. 
 85      """ 
 86 -    def __init__(self, docstring, errors=None): 
 87          """ 
 88          Create a new C{ParsedJavadocDocstring}. 
 89           
 90          @param docstring: The docstring that should be used to 
 91              construct this C{ParsedJavadocDocstring}. 
 92          @type docstring: C{string} 
 93          @param errors: A list where any errors generated during 
 94              parsing will be stored.  If no list is given, then 
 95              all errors are ignored. 
 96          @type errors: C{list} of L{ParseError} 
 97          """ 
 98          self._docstring = docstring 
 99          if errors is None: errors = [] 
100          self._check_links(errors) 
101   
102      #//////////////////////////////////////////////////////////// 
103      # Field Splitting 
104      #//////////////////////////////////////////////////////////// 
105   
106      _ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+ 
107                     'ivar param '+ 
108                     'parameter arg argument raise raises exception '+ 
109                     'except deffield newfield keyword kwarg kwparam').split() 
110      _FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE) 
111       
112      # Inherit docs from ParsedDocstring. 
113 -    def split_fields(self, errors=None): 
114   
115          # Split the docstring into an alternating list of field tags 
116          # and text (odd pieces are field tags). 
117          pieces = self._FIELD_RE.split(self._docstring) 
118   
119          # The first piece is the description. 
120          descr = ParsedJavadocDocstring(pieces[0]) 
121   
122          # The remaining pieces are the block fields (alternating tags 
123          # and bodies; odd pieces are tags). 
124          fields = [] 
125          for i in range(1, len(pieces)): 
126              if i%2 == 1: 
127                  # Get the field tag. 
128                  tag = pieces[i].strip()[1:] 
129              else: 
130                  # Get the field argument (if appropriate). 
131                  if tag in self._ARG_FIELDS: 
132                      (arg, body) = pieces[i].strip().split(None, 1) 
133                  else: 
134                      (arg, body) = (None, pieces[i]) 
135   
136                  # Special processing for @see fields, since Epydoc 
137                  # allows unrestricted text in them, but Javadoc just 
138                  # uses them for xref links: 
139                  if tag == 'see' and body: 
140                      if body[0] in '"\'': 
141                          if body[-1] == body[0]: body = body[1:-1] 
142                      elif body[0] == '<': pass 
143                      else: body = '{@link %s}' % body 
144   
145                  # Construct the field. 
146                  parsed_body = ParsedJavadocDocstring(body) 
147                  fields.append(Field(tag, arg, parsed_body)) 
148   
149          return (descr, fields) 
150   
151      #//////////////////////////////////////////////////////////// 
152      # HTML Output. 
153      #//////////////////////////////////////////////////////////// 
154   
155      _LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})') 
156      _LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' + 
157                            r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}') 
158   
159      # Inherit docs from ParsedDocstring. 
160 -    def to_html(self, docstring_linker, **options): 
161          # Split the docstring into an alternating list of HTML and 
162          # links (odd pieces are links). 
163          pieces = self._LINK_SPLIT_RE.split(self._docstring) 
164   
165          # This function is used to translate {@link ...}s to HTML. 
166          translate_xref = docstring_linker.translate_identifier_xref 
167           
168          # Build up the HTML string from the pieces.  For HTML pieces 
169          # (even), just add it to html.  For link pieces (odd), use 
170          # docstring_linker to translate the crossreference link to 
171          # HTML for us. 
172          html = '' 
173          for i in range(len(pieces)): 
174              if i%2 == 0: 
175                  html += pieces[i] 
176              else: 
177                  # Decompose the link into pieces. 
178                  m = self._LINK_RE.match(pieces[i]) 
179                  if m is None: continue # Error flagged by _check_links 
180                  (target, name) = m.groups() 
181   
182                  # Normalize the target name. 
183                  if target[0] == '#': target = target[1:] 
184                  target = target.replace('#', '.') 
185                  target = re.sub(r'\(.*\)', '', target) 
186   
187                  # Provide a name, if it wasn't specified. 
188                  if name is None: name = target 
189                  else: name = name.strip() 
190   
191                  # Use docstring_linker to convert the name to html. 
192                  html += translate_xref(target, name) 
193          return html 
194   
195 -    def _check_links(self, errors): 
196          """ 
197          Make sure that all @{link}s are valid.  We need a separate 
198          method for ths because we want to do this at parse time, not 
199          html output time.  Any errors found are appended to C{errors}. 
200          """ 
201          pieces = self._LINK_SPLIT_RE.split(self._docstring) 
202          linenum = 0 
203          for i in range(len(pieces)): 
204              if i%2 == 1 and not self._LINK_RE.match(pieces[i]): 
205                  estr = 'Bad link %r' % pieces[i] 
206                  errors.append(ParseError(estr, linenum, is_fatal=0)) 
207              linenum += pieces[i].count('\n') 
208   
209      #//////////////////////////////////////////////////////////// 
210      # Plaintext Output. 
211      #//////////////////////////////////////////////////////////// 
212   
213      # Inherit docs from ParsedDocstring.  Since we don't define 
214      # to_latex, this is used when generating latex output. 
215 -    def to_plaintext(self, docstring_linker, **options): 
216          return self._docstring 
217   
218      # Jeff's hack to get summary working 
219 -    def summary(self): 
220          m = re.match(r'(\s*[\w\W]*?\.)(\s|$)', self._docstring) 
221          if m: 
222              return ParsedJavadocDocstring(m.group(1)) 
223          else: 
224              summary = self._docstring.split('\n', 1)[0]+'...' 
225              return ParsedJavadocDocstring(summary) 
226           
227  #     def concatenate(self, other): 
228  #         if not isinstance(other, ParsedJavadocDocstring): 
229  #             raise ValueError, 'Could not concatenate docstrings' 
230  #         return ParsedJavadocDocstring(self._docstring+other._docstring) 
231
Source Code for Module epydoc.markup.javadoc