summaryrefslogtreecommitdiff
path: root/tools/lib/python/kdoc/kdoc_parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lib/python/kdoc/kdoc_parser.py')
-rw-r--r--tools/lib/python/kdoc/kdoc_parser.py292
1 files changed, 91 insertions, 201 deletions
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index ca00695b47b3..74af7ae47aa4 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,8 @@ import sys
import re
from pprint import pformat
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
+from kdoc.kdoc_re import KernRe
from kdoc.kdoc_item import KdocItem
#
@@ -70,140 +71,9 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * '
cache = False)
#
-# Here begins a long set of transformations to turn structure member prefixes
-# and macro invocations into something we can parse and generate kdoc for.
-#
-struct_args_pattern = r'([^,)]+)'
-
-struct_xforms = [
- # Strip attributes
- (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
- (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__packed\s*', re.S), ' '),
- (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
- (KernRe(r'\s*__private', re.S), ' '),
- (KernRe(r'\s*__rcu', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
- (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
- #
- # Unwrap struct_group macros based on this definition:
- # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
- # which has variants like: struct_group(NAME, MEMBERS...)
- # Only MEMBERS arguments require documentation.
- #
- # Parsing them happens on two steps:
- #
- # 1. drop struct group arguments that aren't at MEMBERS,
- # storing them as STRUCT_GROUP(MEMBERS)
- #
- # 2. remove STRUCT_GROUP() ancillary macro.
- #
- # The original logic used to remove STRUCT_GROUP() using an
- # advanced regex:
- #
- # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
- #
- # with two patterns that are incompatible with
- # Python re module, as it has:
- #
- # - a recursive pattern: (?1)
- # - an atomic grouping: (?>...)
- #
- # I tried a simpler version: but it didn't work either:
- # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
- #
- # As it doesn't properly match the end parenthesis on some cases.
- #
- # So, a better solution was crafted: there's now a NestedMatch
- # class that ensures that delimiters after a search are properly
- # matched. So, the implementation to drop STRUCT_GROUP() will be
- # handled in separate.
- #
- (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
- (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
- (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
- (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
- #
- # Replace macros
- #
- # TODO: use NestedMatch for FOO($1, $2, ...) matches
- #
- # it is better to also move those to the NestedMatch logic,
- # to ensure that parentheses will be properly matched.
- #
- (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
- r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
- (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
- r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
- (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
- re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
- (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
- re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
- (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
- r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
- struct_args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
- struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
- (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
- (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
-]
-#
-# Regexes here are guaranteed to have the end delimiter matching
-# the start delimiter. Yet, right now, only one replace group
-# is allowed.
-#
-struct_nested_prefixes = [
- (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
-]
-
-#
-# Transforms for function prototypes
-#
-function_xforms = [
- (KernRe(r"^static +"), ""),
- (KernRe(r"^extern +"), ""),
- (KernRe(r"^asmlinkage +"), ""),
- (KernRe(r"^inline +"), ""),
- (KernRe(r"^__inline__ +"), ""),
- (KernRe(r"^__inline +"), ""),
- (KernRe(r"^__always_inline +"), ""),
- (KernRe(r"^noinline +"), ""),
- (KernRe(r"^__FORTIFY_INLINE +"), ""),
- (KernRe(r"__init +"), ""),
- (KernRe(r"__init_or_module +"), ""),
- (KernRe(r"__exit +"), ""),
- (KernRe(r"__deprecated +"), ""),
- (KernRe(r"__flatten +"), ""),
- (KernRe(r"__meminit +"), ""),
- (KernRe(r"__must_check +"), ""),
- (KernRe(r"__weak +"), ""),
- (KernRe(r"__sched +"), ""),
- (KernRe(r"_noprof"), ""),
- (KernRe(r"__always_unused *"), ""),
- (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
- (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
- (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
- (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
- (KernRe(r"__attribute_const__ +"), ""),
- (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
-]
-
-#
# Ancillary functions
#
-def apply_transforms(xforms, text):
- """
- Apply a set of transforms to a block of text.
- """
- for search, subst in xforms:
- text = search.sub(subst, text)
- return text
-
multi_space = KernRe(r'\s\s+')
def trim_whitespace(s):
"""
@@ -215,15 +85,9 @@ def trim_private_members(text):
"""
Remove ``struct``/``enum`` members that have been marked "private".
"""
- # First look for a "public:" block that ends a private region, then
- # handle the "private until the end" case.
- #
- text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
- text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
- #
- # We needed the comments to do the above, but now we can take them out.
- #
- return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+ tokens = CTokenizer(text)
+ return str(tokens)
class state:
"""
@@ -276,7 +140,7 @@ class KernelEntry:
self.parametertypes = {}
self.parameterdesc_start_lines = {}
- self.section_start_lines = {}
+ self.sections_start_lines = {}
self.sections = {}
self.anon_struct_union = False
@@ -356,7 +220,7 @@ class KernelEntry:
self.sections[name] += '\n' + contents
else:
self.sections[name] = contents
- self.section_start_lines[name] = self.new_start_line
+ self.sections_start_lines[name] = self.new_start_line
self.new_start_line = 0
# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
@@ -382,11 +246,15 @@ class KernelDoc:
#: String to write when a parameter is not described.
undescribed = "-- undescribed --"
- def __init__(self, config, fname):
+ def __init__(self, config, fname, xforms, store_src=False):
"""Initialize internal variables"""
self.fname = fname
self.config = config
+ self.xforms = xforms
+ self.store_src = store_src
+
+ tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
# Initial state for the state machines
self.state = state.NORMAL
@@ -449,7 +317,7 @@ class KernelDoc:
for section in ["Description", "Return"]:
if section in sections and not sections[section].rstrip():
del sections[section]
- item.set_sections(sections, self.entry.section_start_lines)
+ item.set_sections(sections, self.entry.sections_start_lines)
item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
self.entry.parametertypes,
self.entry.parameterdesc_start_lines)
@@ -849,13 +717,15 @@ class KernelDoc:
return declaration
- def dump_struct(self, ln, proto):
+ def dump_struct(self, ln, proto, source):
"""
Store an entry for a ``struct`` or ``union``
"""
#
# Do the basic parse to get the pieces of the declaration.
#
+ source = source
+ proto = trim_private_members(proto)
struct_parts = self.split_struct_proto(proto)
if not struct_parts:
self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
@@ -869,12 +739,8 @@ class KernelDoc:
#
# Go through the list of members applying all of our transformations.
#
- members = trim_private_members(members)
- members = apply_transforms(struct_xforms, members)
+ members = self.xforms.apply("struct", members)
- nested = NestedMatch()
- for search, sub in struct_nested_prefixes:
- members = nested.sub(search, sub, members)
#
# Deal with embedded struct and union members, and drop enums entirely.
#
@@ -888,10 +754,11 @@ class KernelDoc:
declaration_name)
self.check_sections(ln, declaration_name, decl_type)
self.output_declaration(decl_type, declaration_name,
+ source=source,
definition=self.format_struct_decl(declaration),
purpose=self.entry.declaration_purpose)
- def dump_enum(self, ln, proto):
+ def dump_enum(self, ln, proto, source):
"""
Store an ``enum`` inside self.entries array.
"""
@@ -899,6 +766,8 @@ class KernelDoc:
# Strip preprocessor directives. Note that this depends on the
# trailing semicolon we added in process_proto_type().
#
+ source = source
+ proto = trim_private_members(proto)
proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
#
# Parse out the name and members of the enum. Typedef form first.
@@ -906,7 +775,7 @@ class KernelDoc:
r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
if r.search(proto):
declaration_name = r.group(2)
- members = trim_private_members(r.group(1))
+ members = r.group(1)
#
# Failing that, look for a straight enum
#
@@ -914,7 +783,7 @@ class KernelDoc:
r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
if r.match(proto):
declaration_name = r.group(1)
- members = trim_private_members(r.group(2))
+ members = r.group(2)
#
# OK, this isn't going to work.
#
@@ -943,9 +812,10 @@ class KernelDoc:
member_set = set()
members = KernRe(r'\([^;)]*\)').sub('', members)
for arg in members.split(','):
- if not arg:
- continue
arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+ if not arg.strip():
+ continue
+
self.entry.parameterlist.append(arg)
if arg not in self.entry.parameterdescs:
self.entry.parameterdescs[arg] = self.undescribed
@@ -961,29 +831,23 @@ class KernelDoc:
f"Excess enum value '@{k}' description in '{declaration_name}'")
self.output_declaration('enum', declaration_name,
+ source=source,
purpose=self.entry.declaration_purpose)
- def dump_var(self, ln, proto):
+ def dump_var(self, ln, proto, source):
"""
Store variables that are part of kAPI.
"""
VAR_ATTRIBS = [
"extern",
+ "const",
]
- OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
-
- sub_prefixes = [
- (KernRe(r"__read_mostly"), ""),
- (KernRe(r"__ro_after_init"), ""),
- (KernRe(r"(?://.*)$"), ""),
- (KernRe(r"(?:/\*.*\*/)"), ""),
- (KernRe(r";$"), ""),
- (KernRe(r"=.*"), ""),
- ]
+ OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
#
# Store the full prototype before modifying it
#
+ source = source
full_proto = proto
declaration_name = None
@@ -1004,8 +868,7 @@ class KernelDoc:
# Drop comments and macros to have a pure C prototype
#
if not declaration_name:
- for r, sub in sub_prefixes:
- proto = r.sub(sub, proto)
+ proto = self.xforms.apply("var", proto)
proto = proto.rstrip()
@@ -1015,17 +878,17 @@ class KernelDoc:
default_val = None
- r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+ r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
if r.match(proto):
if not declaration_name:
declaration_name = r.group(1)
default_val = r.group(2)
else:
- r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
- if r.match(proto):
- default_val = r.group(1)
+ r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+ if r.match(proto):
+ default_val = r.group(1)
if not declaration_name:
self.emit_msg(ln,f"{proto}: can't parse variable")
return
@@ -1034,39 +897,38 @@ class KernelDoc:
default_val = default_val.lstrip("=").strip()
self.output_declaration("var", declaration_name,
+ source=source,
full_proto=full_proto,
default_val=default_val,
purpose=self.entry.declaration_purpose)
- def dump_declaration(self, ln, prototype):
+ def dump_declaration(self, ln, prototype, source):
"""
Store a data declaration inside self.entries array.
"""
if self.entry.decl_type == "enum":
- self.dump_enum(ln, prototype)
+ self.dump_enum(ln, prototype, source)
elif self.entry.decl_type == "typedef":
- self.dump_typedef(ln, prototype)
+ self.dump_typedef(ln, prototype, source)
elif self.entry.decl_type in ["union", "struct"]:
- self.dump_struct(ln, prototype)
+ self.dump_struct(ln, prototype, source)
elif self.entry.decl_type == "var":
- self.dump_var(ln, prototype)
+ self.dump_var(ln, prototype, source)
else:
# This would be a bug
self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
- def dump_function(self, ln, prototype):
+ def dump_function(self, ln, prototype, source):
"""
Store a function or function macro inside self.entries array.
"""
+ source = source
found = func_macro = False
return_type = ''
decl_type = 'function'
- #
- # Apply the initial transformations.
- #
- prototype = apply_transforms(function_xforms, prototype)
+
#
# If we have a macro, remove the "#define" at the front.
#
@@ -1085,6 +947,11 @@ class KernelDoc:
declaration_name = r.group(1)
func_macro = True
found = True
+ else:
+ #
+ # Apply the initial transformations.
+ #
+ prototype = self.xforms.apply("func", prototype)
# Yes, this truly is vile. We are looking for:
# 1. Return type (may be nothing if we're looking at a macro)
@@ -1150,13 +1017,14 @@ class KernelDoc:
# Store the result.
#
self.output_declaration(decl_type, declaration_name,
+ source=source,
typedef=('typedef' in return_type),
functiontype=return_type,
purpose=self.entry.declaration_purpose,
func_macro=func_macro)
- def dump_typedef(self, ln, proto):
+ def dump_typedef(self, ln, proto, source):
"""
Store a ``typedef`` inside self.entries array.
"""
@@ -1167,6 +1035,8 @@ class KernelDoc:
typedef_ident = r'\*?\s*(\w\S+)\s*'
typedef_args = r'\s*\((.*)\);'
+ source = source
+
typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
@@ -1187,6 +1057,7 @@ class KernelDoc:
self.create_parameter_list(ln, 'function', args, ',', declaration_name)
self.output_declaration('function', declaration_name,
+ source=source,
typedef=True,
functiontype=return_type,
purpose=self.entry.declaration_purpose)
@@ -1204,6 +1075,7 @@ class KernelDoc:
return
self.output_declaration('typedef', declaration_name,
+ source=source,
purpose=self.entry.declaration_purpose)
return
@@ -1241,7 +1113,7 @@ class KernelDoc:
function_set.add(symbol)
return True
- def process_normal(self, ln, line):
+ def process_normal(self, ln, line, source):
"""
STATE_NORMAL: looking for the ``/**`` to begin everything.
"""
@@ -1255,7 +1127,7 @@ class KernelDoc:
# next line is always the function name
self.state = state.NAME
- def process_name(self, ln, line):
+ def process_name(self, ln, line, source):
"""
STATE_NAME: Looking for the "name - description" line
"""
@@ -1388,7 +1260,7 @@ class KernelDoc:
return False
- def process_decl(self, ln, line):
+ def process_decl(self, ln, line, source):
"""
STATE_DECLARATION: We've seen the beginning of a declaration.
"""
@@ -1417,7 +1289,7 @@ class KernelDoc:
self.emit_msg(ln, f"bad line: {line}")
- def process_special(self, ln, line):
+ def process_special(self, ln, line, source):
"""
STATE_SPECIAL_SECTION: a section ending with a blank line.
"""
@@ -1468,7 +1340,7 @@ class KernelDoc:
# Unknown line, ignore
self.emit_msg(ln, f"bad line: {line}")
- def process_body(self, ln, line):
+ def process_body(self, ln, line, source):
"""
STATE_BODY: the bulk of a kerneldoc comment.
"""
@@ -1482,7 +1354,7 @@ class KernelDoc:
# Unknown line, ignore
self.emit_msg(ln, f"bad line: {line}")
- def process_inline_name(self, ln, line):
+ def process_inline_name(self, ln, line, source):
"""STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
if doc_inline_sect.search(line):
@@ -1495,9 +1367,15 @@ class KernelDoc:
elif doc_content.search(line):
self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
self.state = state.PROTO
+
+ #
+ # Don't let it add partial comments at the code, as breaks the
+ # logic meant to remove comments from prototypes.
+ #
+ self.process_proto_type(ln, "/**\n" + line, source)
# else ... ??
- def process_inline_text(self, ln, line):
+ def process_inline_text(self, ln, line, source):
"""STATE_INLINE_TEXT: docbook comments within a prototype."""
if doc_inline_end.search(line):
@@ -1583,7 +1461,7 @@ class KernelDoc:
return proto
- def process_proto_function(self, ln, line):
+ def process_proto_function(self, ln, line, source):
"""Ancillary routine to process a function prototype."""
# strip C99-style comments to end of line
@@ -1625,10 +1503,10 @@ class KernelDoc:
#
# ... and we're done
#
- self.dump_function(ln, self.entry.prototype)
+ self.dump_function(ln, self.entry.prototype, source)
self.reset_state(ln)
- def process_proto_type(self, ln, line):
+ def process_proto_type(self, ln, line, source):
"""
Ancillary routine to process a type.
"""
@@ -1658,7 +1536,7 @@ class KernelDoc:
elif chunk == '}':
self.entry.brcount -= 1
elif chunk == ';' and self.entry.brcount <= 0:
- self.dump_declaration(ln, self.entry.prototype)
+ self.dump_declaration(ln, self.entry.prototype, source)
self.reset_state(ln)
return
#
@@ -1667,7 +1545,7 @@ class KernelDoc:
#
self.entry.prototype += ' '
- def process_proto(self, ln, line):
+ def process_proto(self, ln, line, source):
"""STATE_PROTO: reading a function/whatever prototype."""
if doc_inline_oneline.search(line):
@@ -1679,17 +1557,18 @@ class KernelDoc:
self.state = state.INLINE_NAME
elif self.entry.decl_type == 'function':
- self.process_proto_function(ln, line)
+ self.process_proto_function(ln, line, source)
else:
- self.process_proto_type(ln, line)
+ self.process_proto_type(ln, line, source)
- def process_docblock(self, ln, line):
+ def process_docblock(self, ln, line, source):
"""STATE_DOCBLOCK: within a ``DOC:`` block."""
if doc_end.search(line):
self.dump_section()
- self.output_declaration("doc", self.entry.identifier)
+ self.output_declaration("doc", self.entry.identifier,
+ source=source)
self.reset_state(ln)
elif doc_content.search(line):
@@ -1740,6 +1619,8 @@ class KernelDoc:
prev = ""
prev_ln = None
export_table = set()
+ self.state = state.NORMAL
+ source = ""
try:
with open(self.fname, "r", encoding="utf8",
@@ -1766,6 +1647,12 @@ class KernelDoc:
ln, state.name[self.state],
line)
+ if self.store_src:
+ if source and self.state == state.NORMAL:
+ source = ""
+ elif self.state != state.NORMAL:
+ source += line + "\n"
+
# This is an optimization over the original script.
# There, when export_file was used for the same file,
# it was read twice. Here, we use the already-existing
@@ -1773,8 +1660,11 @@ class KernelDoc:
#
if (self.state != state.NORMAL) or \
not self.process_export(export_table, line):
+ prev_state = self.state
# Hand this line to the appropriate state handler
- self.state_actions[self.state](self, ln, line)
+ self.state_actions[self.state](self, ln, line, source)
+ if prev_state == state.NORMAL and self.state != state.NORMAL:
+ source += line + "\n"
self.emit_unused_warnings()