diff options
Diffstat (limited to 'tools/lib/python/kdoc/kdoc_parser.py')
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_parser.py | 292 |
1 files changed, 91 insertions, 201 deletions
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ca00695b47b3..74af7ae47aa4 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,8 @@ import sys import re from pprint import pformat -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.c_lex import CTokenizer, tokenizer_set_log +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # @@ -70,140 +71,9 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' cache = False) # -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# # Ancillary functions # -def apply_transforms(xforms, text): - """ - Apply a set of transforms to a block of text. - """ - for search, subst in xforms: - text = search.sub(subst, text) - return text - multi_space = KernRe(r'\s\s+') def trim_whitespace(s): """ @@ -215,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ @@ -276,7 +140,7 @@ class KernelEntry: self.parametertypes = {} self.parameterdesc_start_lines = {} - self.section_start_lines = {} + self.sections_start_lines = {} self.sections = {} self.anon_struct_union = False @@ -356,7 +220,7 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line + self.sections_start_lines[name] = self.new_start_line self.new_start_line = 0 # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) @@ -382,11 +246,15 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname): + def __init__(self, config, fname, xforms, store_src=False): """Initialize internal variables""" self.fname = fname self.config = config + self.xforms = xforms + self.store_src = store_src + + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") # Initial state for the state machines self.state = state.NORMAL @@ -449,7 +317,7 @@ class KernelDoc: for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] - item.set_sections(sections, self.entry.section_start_lines) + item.set_sections(sections, self.entry.sections_start_lines) item.set_params(self.entry.parameterlist, self.entry.parameterdescs, self.entry.parametertypes, self.entry.parameterdesc_start_lines) @@ -849,13 +717,15 @@ class KernelDoc: return declaration - def dump_struct(self, ln, proto): + def dump_struct(self, ln, proto, source): """ Store an entry for a ``struct`` or ``union`` """ # # Do the basic parse to get the pieces of the declaration. # + source = source + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -869,12 +739,8 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) + members = self.xforms.apply("struct", members) - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # @@ -888,10 +754,11 @@ class KernelDoc: declaration_name) self.check_sections(ln, declaration_name, decl_type) self.output_declaration(decl_type, declaration_name, + source=source, definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) - def dump_enum(self, ln, proto): + def dump_enum(self, ln, proto, source): """ Store an ``enum`` inside self.entries array. """ @@ -899,6 +766,8 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + source = source + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -906,7 +775,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -914,7 +783,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # @@ -943,9 +812,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -961,29 +831,23 @@ class KernelDoc: f"Excess enum value '@{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, + source=source, purpose=self.entry.declaration_purpose) - def dump_var(self, ln, proto): + def dump_var(self, ln, proto, source): """ Store variables that are part of kAPI. """ VAR_ATTRIBS = [ "extern", + "const", ] - OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" - - sub_prefixes = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), - ] + OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" # # Store the full prototype before modifying it # + source = source full_proto = proto declaration_name = None @@ -1004,8 +868,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in sub_prefixes: - proto = r.sub(sub, proto) + proto = self.xforms.apply("var", proto) proto = proto.rstrip() @@ -1015,17 +878,17 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") - if r.match(proto): - default_val = r.group(1) + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + if r.match(proto): + default_val = r.group(1) if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return @@ -1034,39 +897,38 @@ class KernelDoc: default_val = default_val.lstrip("=").strip() self.output_declaration("var", declaration_name, + source=source, full_proto=full_proto, default_val=default_val, purpose=self.entry.declaration_purpose) - def dump_declaration(self, ln, prototype): + def dump_declaration(self, ln, prototype, source): """ Store a data declaration inside self.entries array. """ if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) + self.dump_enum(ln, prototype, source) elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) + self.dump_typedef(ln, prototype, source) elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) + self.dump_struct(ln, prototype, source) elif self.entry.decl_type == "var": - self.dump_var(ln, prototype) + self.dump_var(ln, prototype, source) else: # This would be a bug self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - def dump_function(self, ln, prototype): + def dump_function(self, ln, prototype, source): """ Store a function or function macro inside self.entries array. """ + source = source found = func_macro = False return_type = '' decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) + # # If we have a macro, remove the "#define" at the front. # @@ -1085,6 +947,11 @@ class KernelDoc: declaration_name = r.group(1) func_macro = True found = True + else: + # + # Apply the initial transformations. + # + prototype = self.xforms.apply("func", prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) @@ -1150,13 +1017,14 @@ class KernelDoc: # Store the result. # self.output_declaration(decl_type, declaration_name, + source=source, typedef=('typedef' in return_type), functiontype=return_type, purpose=self.entry.declaration_purpose, func_macro=func_macro) - def dump_typedef(self, ln, proto): + def dump_typedef(self, ln, proto, source): """ Store a ``typedef`` inside self.entries array. """ @@ -1167,6 +1035,8 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' + source = source + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) @@ -1187,6 +1057,7 @@ class KernelDoc: self.create_parameter_list(ln, 'function', args, ',', declaration_name) self.output_declaration('function', declaration_name, + source=source, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) @@ -1204,6 +1075,7 @@ class KernelDoc: return self.output_declaration('typedef', declaration_name, + source=source, purpose=self.entry.declaration_purpose) return @@ -1241,7 +1113,7 @@ class KernelDoc: function_set.add(symbol) return True - def process_normal(self, ln, line): + def process_normal(self, ln, line, source): """ STATE_NORMAL: looking for the ``/**`` to begin everything. """ @@ -1255,7 +1127,7 @@ class KernelDoc: # next line is always the function name self.state = state.NAME - def process_name(self, ln, line): + def process_name(self, ln, line, source): """ STATE_NAME: Looking for the "name - description" line """ @@ -1388,7 +1260,7 @@ class KernelDoc: return False - def process_decl(self, ln, line): + def process_decl(self, ln, line, source): """ STATE_DECLARATION: We've seen the beginning of a declaration. """ @@ -1417,7 +1289,7 @@ class KernelDoc: self.emit_msg(ln, f"bad line: {line}") - def process_special(self, ln, line): + def process_special(self, ln, line, source): """ STATE_SPECIAL_SECTION: a section ending with a blank line. """ @@ -1468,7 +1340,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_body(self, ln, line): + def process_body(self, ln, line, source): """ STATE_BODY: the bulk of a kerneldoc comment. """ @@ -1482,7 +1354,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_inline_name(self, ln, line): + def process_inline_name(self, ln, line, source): """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" if doc_inline_sect.search(line): @@ -1495,9 +1367,15 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line, source) # else ... ?? - def process_inline_text(self, ln, line): + def process_inline_text(self, ln, line, source): """STATE_INLINE_TEXT: docbook comments within a prototype.""" if doc_inline_end.search(line): @@ -1583,7 +1461,7 @@ class KernelDoc: return proto - def process_proto_function(self, ln, line): + def process_proto_function(self, ln, line, source): """Ancillary routine to process a function prototype.""" # strip C99-style comments to end of line @@ -1625,10 +1503,10 @@ class KernelDoc: # # ... and we're done # - self.dump_function(ln, self.entry.prototype) + self.dump_function(ln, self.entry.prototype, source) self.reset_state(ln) - def process_proto_type(self, ln, line): + def process_proto_type(self, ln, line, source): """ Ancillary routine to process a type. """ @@ -1658,7 +1536,7 @@ class KernelDoc: elif chunk == '}': self.entry.brcount -= 1 elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) + self.dump_declaration(ln, self.entry.prototype, source) self.reset_state(ln) return # @@ -1667,7 +1545,7 @@ class KernelDoc: # self.entry.prototype += ' ' - def process_proto(self, ln, line): + def process_proto(self, ln, line, source): """STATE_PROTO: reading a function/whatever prototype.""" if doc_inline_oneline.search(line): @@ -1679,17 +1557,18 @@ class KernelDoc: self.state = state.INLINE_NAME elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) + self.process_proto_function(ln, line, source) else: - self.process_proto_type(ln, line) + self.process_proto_type(ln, line, source) - def process_docblock(self, ln, line): + def process_docblock(self, ln, line, source): """STATE_DOCBLOCK: within a ``DOC:`` block.""" if doc_end.search(line): self.dump_section() - self.output_declaration("doc", self.entry.identifier) + self.output_declaration("doc", self.entry.identifier, + source=source) self.reset_state(ln) elif doc_content.search(line): @@ -1740,6 +1619,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() + self.state = state.NORMAL + source = "" try: with open(self.fname, "r", encoding="utf8", @@ -1766,6 +1647,12 @@ class KernelDoc: ln, state.name[self.state], line) + if self.store_src: + if source and self.state == state.NORMAL: + source = "" + elif self.state != state.NORMAL: + source += line + "\n" + # This is an optimization over the original script. # There, when export_file was used for the same file, # it was read twice. Here, we use the already-existing @@ -1773,8 +1660,11 @@ class KernelDoc: # if (self.state != state.NORMAL) or \ not self.process_export(export_table, line): + prev_state = self.state # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) + self.state_actions[self.state](self, ln, line, source) + if prev_state == state.NORMAL and self.state != state.NORMAL: + source += line + "\n" self.emit_unused_warnings() |
