From 2b144a30a407d29b7e6d24549f5316175115e788 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:44 +0100 Subject: docs: kdoc_re: add support for groups() Add an equivalent to re groups() method. This is useful on debug messages. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <20d1a9c77200e28cc2ff1d6122635c43f8ba6a71.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_re.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 0bf9e01cdc57..774dd747ecb0 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -106,6 +106,13 @@ class KernRe: return self.last_match.group(num) + def groups(self): + """ + Returns the group results of the last match + """ + + return self.last_match.groups() + class NestedMatch: """ -- cgit v1.2.3 From 8eb49357ffa229c9b65a002f655c1280dc09769a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:45 +0100 Subject: docs: kdoc_re: don't go past the end of a line The logic which checks if the line ends with ";" is currently broken: it may try to read past the buffer. Fix it by checking before trying to access line[pos]. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_re.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 774dd747ecb0..6c44fcce0415 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -269,7 +269,7 @@ class NestedMatch: out += new_sub # Drop end ';' if any - if line[pos] == ';': + if pos < len(line) and line[pos] == ';': pos += 1 cur_pos = pos -- cgit v1.2.3 From 77e6e17e9fc4cb4e59ad97de5453bb6f963a5fd4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:46 +0100 Subject: docs: kdoc_parser: move var transformers to the beginning Just like functions and structs had their transform variables placed at the beginning, move variable transforms to there as well. No functional changes. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <491b290252a308f381f88353a3bbe9e2bd1f6a62.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ca00695b47b3..68a5aea9175d 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -192,6 +192,18 @@ function_xforms = [ (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] +# +# Transforms for variable prototypes +# +var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + (KernRe(r"=.*"), ""), +] + # # Ancillary functions # @@ -972,15 +984,6 @@ class KernelDoc: ] OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" - sub_prefixes = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), - ] - # # Store the full prototype before modifying it # @@ -1004,7 +1007,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in sub_prefixes: + for r, sub in var_xforms: proto = r.sub(sub, proto) proto = proto.rstrip() -- cgit v1.2.3 From cca1bbdd72f72a3cf86d90fd6f326fd709ae931f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:47 +0100 Subject: docs: kdoc_parser: don't mangle with function defines Mangling with #defines is not nice, as we may end removing the macro names, preventing several macros from being properly documented. Also, on defines, we have something like: #define foo(a1, a2, a3, ...) \ /* some real implementation */ The prototype part (first line on this example) won't contain any macros, so no need to apply any regexes on it. With that, move the apply_transforms() logic to ensure that it will be called only on functions. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <8f9854c8ca1c794b6a3fe418f7adbc32aa68b432.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 68a5aea9175d..9643ffb7584a 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -163,7 +163,7 @@ struct_nested_prefixes = [ # # Transforms for function prototypes # -function_xforms = [ +function_xforms = [ (KernRe(r"^static +"), ""), (KernRe(r"^extern +"), ""), (KernRe(r"^asmlinkage +"), ""), @@ -1066,10 +1066,7 @@ class KernelDoc: found = func_macro = False return_type = '' decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) + # # If we have a macro, remove the "#define" at the front. # @@ -1088,6 +1085,11 @@ class KernelDoc: declaration_name = r.group(1) func_macro = True found = True + else: + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) -- cgit v1.2.3 From 4fd349f03dc51bc2f9cd2ea9f6309b0bc2b848ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:48 +0100 Subject: docs: kdoc_parser: fix variable regexes to work with size_t The regular expressions meant to pick variable types are too naive: they forgot that the type word may contain underlines. It also means that we need to change the regex which detects var attributes to handle "const". Co-developed-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <8230715239929cf9d475ab81ca1df7de65d82d06.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 9643ffb7584a..9c9443281c40 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -981,8 +981,9 @@ class KernelDoc: """ VAR_ATTRIBS = [ "extern", + "const", ] - OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" + OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" # # Store the full prototype before modifying it @@ -1018,14 +1019,14 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) -- cgit v1.2.3 From 9bbf22b87d866fa1e6a1f9f6376d2ef458b6dcc7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:49 +0100 Subject: docs: kdoc_parser: fix the default_value logic for variables The indentation is wrong for the second regex, which causes problems on variables with defaults. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <681f18338abd6ae33cb9c15d72bb31a1cba75a9a.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 9c9443281c40..4bf55244870f 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1027,9 +1027,9 @@ class KernelDoc: default_val = r.group(2) else: r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") - if r.match(proto): - default_val = r.group(1) + if r.match(proto): + default_val = r.group(1) if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return -- cgit v1.2.3 From b7dc635459ad5b00f2d482406dbdca3291622ce2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:50 +0100 Subject: docs: kdoc_parser: don't exclude defaults from prototype If we do that, the defaults won't be parsed. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 4bf55244870f..39ff27d421eb 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -201,7 +201,6 @@ var_xforms = [ (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), ] # -- cgit v1.2.3 From 6d9c2e9575b8630e17571a77eef8ade84a2a6344 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:51 +0100 Subject: docs: kdoc_parser: fix parser to support multi-word types The regular expression currently expects a single word for the type, but it may be something like "struct foo". Add support for it. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <544c73a9e670b6fef1828bf4f2ba0de7d29d8675.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 39ff27d421eb..22a820d33dc8 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1018,14 +1018,14 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): default_val = r.group(1) -- cgit v1.2.3 From 9bff5121fe22fdd0bb5bd6f744e136ec20bf7b95 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:52 +0100 Subject: docs: kdoc_parser: add support for LIST_HEAD Convert LIST_HEAD into struct list_head when handling its prototype. Signed-off-by: Mauro Carvalho Chehab Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <8bdfa6ba6002b0a73a83660f0ce7b40e30124552.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 22a820d33dc8..1df869061bf3 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -198,6 +198,7 @@ function_xforms = [ var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- cgit v1.2.3 From 97d4e70bc2c6f75911a9a5e1a75f2de13fde9b6b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Mar 2026 17:40:53 +0100 Subject: docs: kdoc_parser: handle struct member macro VIRTIO_DECLARE_FEATURES(name) Parse the macro VIRTIO_DECLARE_FEATURES(name) and expand it to its definition. These prevents one build warning: WARNING: include/linux/virtio.h:188 struct member 'VIRTIO_DECLARE_FEATURES(features' not described in 'virtio_device' Signed-off-by: Randy Dunlap Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <6f62e1f1210e74906fa50f4e937f66f54813661b.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 1df869061bf3..917e4528bfbf 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -150,6 +150,7 @@ struct_xforms = [ struct_args_pattern + r'\)', re.S), r'\1 \2[]'), (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), ] # # Regexes here are guaranteed to have the end delimiter matching -- cgit v1.2.3 From 95a9429cc6d31371575793ab7beb94bf3e7a2f92 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:54 +0100 Subject: docs: kdoc_re: better show KernRe() at documentation the __repr__() function is used by autodoc to document macro initialization. Add a better representation for them. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <80d27732368c14125c1b76048a70d8b4aee527ef.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_re.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 6c44fcce0415..664c04c8cc9f 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -52,7 +52,28 @@ class KernRe: return self.regex.pattern def __repr__(self): - return f're.compile("{self.regex.pattern}")' + """ + Returns a displayable version of the class init. + """ + + flag_map = { + re.IGNORECASE: "re.I", + re.MULTILINE: "re.M", + re.DOTALL: "re.S", + re.VERBOSE: "re.X", + } + + flags = [] + for flag, name in flag_map.items(): + if self.regex.flags & flag: + flags.append(name) + + flags_name = " | ".join(flags) + + if flags_name: + return f'KernRe("{self.regex.pattern}", {flags_name})' + else: + return f'KernRe("{self.regex.pattern}")' def __add__(self, other): """ -- cgit v1.2.3 From d842057c4a205084fb3036122c7426963f04e826 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:55 +0100 Subject: docs: kdoc_parser: move transform lists to a separate file Over the time, most of the changes at kernel-doc are related to maintaining a list of transforms to convert macros into pure C code. Place such transforms on a separate module, to cleanup the parser module. There is an advantage on that: QEMU also uses our own kernel-doc, but the xforms list there is different. By placing it on a separate module, we can minimize the differences and make it easier to keep QEMU in sync with Kernel upstream. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_files.py | 3 +- tools/lib/python/kdoc/kdoc_parser.py | 145 ++------------------------------ tools/lib/python/kdoc/xforms_lists.py | 153 ++++++++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+), 141 deletions(-) create mode 100644 tools/lib/python/kdoc/xforms_lists.py (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 022487ea2cc6..33618c6abec2 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -15,6 +15,7 @@ import os import re from kdoc.kdoc_parser import KernelDoc +from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat @@ -117,7 +118,7 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname) + doc = KernelDoc(self.config, fname, CTransforms()) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 917e4528bfbf..d7daf658e9d2 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -69,89 +69,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) -# -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), -] # # Regexes here are guaranteed to have the end delimiter matching # the start delimiter. Yet, right now, only one replace group @@ -161,62 +78,10 @@ struct_nested_prefixes = [ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# -# Transforms for variable prototypes -# -var_xforms = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), -] - # # Ancillary functions # -def apply_transforms(xforms, text): - """ - Apply a set of transforms to a block of text. - """ - for search, subst in xforms: - text = search.sub(subst, text) - return text - multi_space = KernRe(r'\s\s+') def trim_whitespace(s): """ @@ -395,11 +260,12 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname): + def __init__(self, config, fname, xforms): """Initialize internal variables""" self.fname = fname self.config = config + self.xforms = xforms # Initial state for the state machines self.state = state.NORMAL @@ -883,7 +749,7 @@ class KernelDoc: # Go through the list of members applying all of our transformations. # members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) + members = self.xforms.apply("struct", members) nested = NestedMatch() for search, sub in struct_nested_prefixes: @@ -1009,8 +875,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in var_xforms: - proto = r.sub(sub, proto) + proto = self.xforms.apply("var", proto) proto = proto.rstrip() @@ -1091,7 +956,7 @@ class KernelDoc: # # Apply the initial transformations. # - prototype = apply_transforms(function_xforms, prototype) + prototype = self.xforms.apply("func", prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py new file mode 100644 index 000000000000..e6e0302e5dd0 --- /dev/null +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . + +import re + +from kdoc.kdoc_re import KernRe + +struct_args_pattern = r'([^,)]+)' + +class CTransforms: + """ + Data class containing a long set of transformations to turn + structure member prefixes, and macro invocations and variables + into something we can parse and generate kdoc for. + """ + + #: Transforms for structs and unions. + struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*__private', re.S), ' '), + (KernRe(r'\s*__rcu', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parentheses will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + ] + + #: Transforms for function prototypes. + function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__exit +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__always_unused *"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), + ] + + #: Transforms for variable prototypes. + var_xforms = [ + (KernRe(r"__read_mostly"), ""), + (KernRe(r"__ro_after_init"), ""), + (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + ] + + #: Transforms main dictionary used at apply_transforms(). + xforms = { + "struct": struct_xforms, + "func": function_xforms, + "var": var_xforms, + } + + def apply(self, xforms_type, text): + """ + Apply a set of transforms to a block of text. + """ + if xforms_type not in self.xforms: + return text + + for search, subst in self.xforms[xforms_type]: + text = search.sub(subst, text) + return text -- cgit v1.2.3 From 4ff59bdd93f0e80b5014977502d082c778f96304 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 2 Mar 2026 17:40:56 +0100 Subject: docs: xforms_lists: ignore context analysis and lock attributes Drop context analysis and lock (tracking) attributes to avoid kernel-doc warnings. There are now lots of warnings like these: Documentation/core-api/kref:328: ../include/linux/kref.h:72: WARNING: Invalid C declaration: Expected end of definition. [error at 96] int kref_put_mutex (struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) __cond_acquires(true# mutex) ------------------------------------------------------------------------------------------------^ Documentation/core-api/kref:328: ../include/linux/kref.h:94: WARNING: Invalid C declaration: Expected end of definition. [error at 92] int kref_put_lock (struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) __cond_acquires(true# lock) --------------------------------------------------------------------------------------------^ The regex is suggested by Mauro; mine was too greedy. Thanks. Updated context analysis and lock macros list provided by PeterZ. Thanks. [mchehab: modified to be applied after xforms_lists split] Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/all/20260107161548.45530e1c@canb.auug.org.au/ Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <3c7fdfc364a8920f92530b47bdbf4bb29a40371f.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 10 ++++++++++ tools/lib/python/kdoc/xforms_lists.py | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index d7daf658e9d2..503a18212747 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -75,6 +75,16 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # is allowed. # struct_nested_prefixes = [ + (re.compile(r"__cond_acquires\s*\("), ""), + (re.compile(r"__cond_releases\s*\("), ""), + (re.compile(r"__acquires\s*\("), ""), + (re.compile(r"__releases\s*\("), ""), + (re.compile(r"__must_hold\s*\("), ""), + (re.compile(r"__must_not_hold\s*\("), ""), + (re.compile(r"__must_hold_shared\s*\("), ""), + (re.compile(r"__cond_acquires_shared\s*\("), ""), + (re.compile(r"__acquires_shared\s*\("), ""), + (re.compile(r"__releases_shared\s*\("), ""), (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index e6e0302e5dd0..1bda7c4634c3 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -22,6 +22,8 @@ class CTransforms: (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), (KernRe(r'\s*__packed\s*', re.S), ' '), (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), (KernRe(r'\s*__private', re.S), ' '), @@ -120,6 +122,7 @@ class CTransforms: (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__no_context_analysis\s*"), ""), (KernRe(r"__attribute_const__ +"), ""), (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] @@ -128,6 +131,8 @@ class CTransforms: var_xforms = [ (KernRe(r"__read_mostly"), ""), (KernRe(r"__ro_after_init"), ""), + (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), + (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), -- cgit v1.2.3 From 134468b0e2043efec4bd25dc6bcef238358a8111 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:57 +0100 Subject: docs: kdoc_re: handle strings and escape chars on NextMatch The logic inside NestedMatch currently doesn't consider that function arguments may have chars and strings, which may eventually contain delimiters. Add logic to handle strings and escape characters on them. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_re.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 664c04c8cc9f..0a7f12616f9f 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -216,6 +216,8 @@ class NestedMatch: for match_re in regex.finditer(line): start = match_re.start() offset = match_re.end() + string_char = None + escape = False d = line[offset - 1] if d not in self.DELIMITER_PAIRS: @@ -229,6 +231,22 @@ class NestedMatch: d = line[pos] + if escape: + escape = False + continue + + if string_char: + if d == '\\': + escape = True + elif d == string_char: + string_char = None + + continue + + if d in ('"', "'"): + string_char = d + continue + if d in self.DELIMITER_PAIRS: end = self.DELIMITER_PAIRS[d] -- cgit v1.2.3 From 962bdc440df58008e0319d6cbe08c4ca1193c112 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:58 +0100 Subject: docs: kdoc_re: don't recompile NestedMatch regex every time Store delimiters and its regex-compiled version as const vars. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <0cf2b72d4785aa8b727188b56688ff442d1c65ce.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_re.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 0a7f12616f9f..00afa5bccd6d 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -99,6 +99,13 @@ class KernRe: self.last_match = self.regex.search(string) return self.last_match + def finditer(self, string): + """ + Alias to re.finditer. + """ + + return self.regex.finditer(string) + def findall(self, string): """ Alias to re.findall. @@ -134,6 +141,16 @@ class KernRe: return self.last_match.groups() +#: Nested delimited pairs (brackets and parenthesis) +DELIMITER_PAIRS = { + '{': '}', + '(': ')', + '[': ']', +} + +#: compiled delimiters +RE_DELIM = KernRe(r'[\{\}\[\]\(\)]') + class NestedMatch: """ @@ -183,14 +200,6 @@ class NestedMatch: # # FOO(arg1, arg2, arg3) - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - def _search(self, regex, line): """ Finds paired blocks for a regex that ends with a delimiter. @@ -220,13 +229,13 @@ class NestedMatch: escape = False d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: + if d not in DELIMITER_PAIRS: continue - end = self.DELIMITER_PAIRS[d] + end = DELIMITER_PAIRS[d] stack.append(end) - for match in self.RE_DELIM.finditer(line[offset:]): + for match in RE_DELIM.finditer(line[offset:]): pos = match.start() + offset d = line[pos] @@ -247,8 +256,8 @@ class NestedMatch: string_char = d continue - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] + if d in DELIMITER_PAIRS: + end = DELIMITER_PAIRS[d] stack.append(end) continue -- cgit v1.2.3 From 34503b5fd10d8c7f1b1f4fecb6aae826fcf79424 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:40:59 +0100 Subject: docs: kdoc_re: Change NestedMath args replacement to \0 Future patches will allow parsing each argument instead of the hole set. Prepare for it by changing the replace all args from \1 to \0. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <46e383118be9d9e432e3814fe819ebb12261d7b4.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/kdoc_re.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 503a18212747..0f90c16cb51a 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -85,7 +85,7 @@ struct_nested_prefixes = [ (re.compile(r"__cond_acquires_shared\s*\("), ""), (re.compile(r"__acquires_shared\s*\("), ""), (re.compile(r"__releases_shared\s*\("), ""), - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + (re.compile(r'\bSTRUCT_GROUP\('), r'\0'), ] # diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 00afa5bccd6d..ea4f6f3d9e42 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -188,7 +188,7 @@ class NestedMatch: # except that the content inside the match group is delimiter-aligned. # # The content inside parentheses is converted into a single replace - # group (e.g. r`\1'). + # group (e.g. r`\0'). # # It would be nice to change such definition to support multiple # match groups, allowing a regex equivalent to: @@ -291,7 +291,7 @@ class NestedMatch: if the sub argument contains:: - r'\1' + r'\0' it will work just like re: it places there the matched paired data with the delimiter stripped. @@ -310,9 +310,9 @@ class NestedMatch: # Value, ignoring start/end delimiters value = line[end:pos - 1] - # replaces \1 at the sub string, if \1 is used there + # replaces \0 at the sub string, if \0 is used there new_sub = sub - new_sub = new_sub.replace(r'\1', value) + new_sub = new_sub.replace(r'\0', value) out += new_sub -- cgit v1.2.3 From fc44c0a0b2a72f2e9331063a311a548634ae18af Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:41:00 +0100 Subject: docs: kdoc_re: make NestedMatch use KernRe Instead of using re_compile, let's create the class with the regex and use KernRe to keep it cached. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 25 ++++++++++++------------- tools/lib/python/kdoc/kdoc_re.py | 24 +++++++++++++++++------- 2 files changed, 29 insertions(+), 20 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 0f90c16cb51a..cd9857906a2b 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -75,17 +75,17 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # is allowed. # struct_nested_prefixes = [ - (re.compile(r"__cond_acquires\s*\("), ""), - (re.compile(r"__cond_releases\s*\("), ""), - (re.compile(r"__acquires\s*\("), ""), - (re.compile(r"__releases\s*\("), ""), - (re.compile(r"__must_hold\s*\("), ""), - (re.compile(r"__must_not_hold\s*\("), ""), - (re.compile(r"__must_hold_shared\s*\("), ""), - (re.compile(r"__cond_acquires_shared\s*\("), ""), - (re.compile(r"__acquires_shared\s*\("), ""), - (re.compile(r"__releases_shared\s*\("), ""), - (re.compile(r'\bSTRUCT_GROUP\('), r'\0'), + (NestedMatch(r"__cond_acquires\s*\("), ""), + (NestedMatch(r"__cond_releases\s*\("), ""), + (NestedMatch(r"__acquires\s*\("), ""), + (NestedMatch(r"__releases\s*\("), ""), + (NestedMatch(r"__must_hold\s*\("), ""), + (NestedMatch(r"__must_not_hold\s*\("), ""), + (NestedMatch(r"__must_hold_shared\s*\("), ""), + (NestedMatch(r"__cond_acquires_shared\s*\("), ""), + (NestedMatch(r"__acquires_shared\s*\("), ""), + (NestedMatch(r"__releases_shared\s*\("), ""), + (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] # @@ -761,9 +761,8 @@ class KernelDoc: members = trim_private_members(members) members = self.xforms.apply("struct", members) - nested = NestedMatch() for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) + members = search.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index ea4f6f3d9e42..085b89a4547c 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -200,7 +200,10 @@ class NestedMatch: # # FOO(arg1, arg2, arg3) - def _search(self, regex, line): + def __init__(self, regex): + self.regex = KernRe(regex) + + def _search(self, line): """ Finds paired blocks for a regex that ends with a delimiter. @@ -222,7 +225,7 @@ class NestedMatch: stack = [] - for match_re in regex.finditer(line): + for match_re in self.regex.finditer(line): start = match_re.start() offset = match_re.end() string_char = None @@ -270,7 +273,7 @@ class NestedMatch: yield start, offset, pos + 1 break - def search(self, regex, line): + def search(self, line): """ This is similar to re.search: @@ -278,12 +281,12 @@ class NestedMatch: returning occurrences only if all delimiters are paired. """ - for t in self._search(regex, line): + for t in self._search(line): yield line[t[0]:t[2]] - def sub(self, regex, sub, line, count=0): - r""" + def sub(self, sub, line, count=0): + """ This is similar to re.sub: It matches a regex that it is followed by a delimiter, @@ -304,7 +307,7 @@ class NestedMatch: cur_pos = 0 n = 0 - for start, end, pos in self._search(regex, line): + for start, end, pos in self._search(line): out += line[cur_pos:start] # Value, ignoring start/end delimiters @@ -331,3 +334,10 @@ class NestedMatch: out += line[cur_pos:l] return out + + def __repr__(self): + """ + Returns a displayable version of the class init. + """ + + return f'NestedMatch("{self.regex.regex.pattern}")' -- cgit v1.2.3 From 85c2a51357f720fabfb6fa8d2551d87a94e797cb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 2 Mar 2026 17:41:01 +0100 Subject: docs: kdoc_parser: move nested match transforms to xforms_lists.py As NestedMatch now has a sub method and a declaration close to what KernRe does, we can move the rules to xforms_lists and simplify kdoc_parser a little bit. No functional changes. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <762ce2a58ff024c1b0b6f6a6e05020d1415b8308.1772469446.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 21 --------------------- tools/lib/python/kdoc/xforms_lists.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index cd9857906a2b..edf70ba139a5 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -69,25 +69,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) -# -# Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), -] - # # Ancillary functions # @@ -761,8 +742,6 @@ class KernelDoc: members = trim_private_members(members) members = self.xforms.apply("struct", members) - for search, sub in struct_nested_prefixes: - members = search.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 1bda7c4634c3..c07cbe1e6349 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,7 @@ import re -from kdoc.kdoc_re import KernRe +from kdoc.kdoc_re import KernRe, NestedMatch struct_args_pattern = r'([^,)]+)' @@ -94,6 +94,18 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), + + (NestedMatch(r"__cond_acquires\s*\("), ""), + (NestedMatch(r"__cond_releases\s*\("), ""), + (NestedMatch(r"__acquires\s*\("), ""), + (NestedMatch(r"__releases\s*\("), ""), + (NestedMatch(r"__must_hold\s*\("), ""), + (NestedMatch(r"__must_not_hold\s*\("), ""), + (NestedMatch(r"__must_hold_shared\s*\("), ""), + (NestedMatch(r"__cond_acquires_shared\s*\("), ""), + (NestedMatch(r"__acquires_shared\s*\("), ""), + (NestedMatch(r"__releases_shared\s*\("), ""), + (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From 861dcdb6ad6f339a5958764352e626e2af7df4c1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:25:14 +0100 Subject: docs: kdoc_files: allows the caller to use a different xforms class While the main goal for kernel-doc is to be used inside the Linux Kernel, other open source projects could benefit for it. That's currently the case of QEMU, which has a fork, mainly due to two reasons: - they need an extra C function transform rule; - they handle the html output a little bit different. Add an extra optional argument to make easier for the code to be shared, as, with that, QEMU can just create a new derivated class that will contain its specific rulesets, and just copy the remaining kernel-doc files as-is. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <6b274ddbdcd9d438c6848e00e410a2f65ef80ec2.1772810574.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_files.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 33618c6abec2..c35e033cf123 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -118,7 +118,7 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname, CTransforms()) + doc = KernelDoc(self.config, fname, self.xforms) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -154,7 +154,7 @@ class KernelFiles(): self.error(f"Cannot find file {fname}") - def __init__(self, verbose=False, out_style=None, + def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, logger=None): @@ -193,6 +193,11 @@ class KernelFiles(): self.config.wshort_desc = wshort_desc self.config.wcontents_before_sections = wcontents_before_sections + if xforms: + self.xforms = xforms + else: + self.xforms = CTransforms() + if not logger: self.config.log = logging.getLogger("kernel-doc") else: -- cgit v1.2.3 From 0d3ab0e4bbfd688bfaef66b6365a71c70a0f0450 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:25:15 +0100 Subject: docs: kdoc_files: document KernelFiles() ABI The KernelFiles is the main entry point to run kernel-doc, being used by both tools/docs/kernel-doc and Documentation/sphinx/kerneldoc.py. It is also used on QEMU, which also uses the kernel-doc libraries from tools/lib/python/kdoc. Properly describe its ABI contract. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <13eb44c341232564eaf2a9c9de4782369fef57e1.1772810574.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_files.py | 44 ++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index c35e033cf123..8c2059623949 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -91,7 +91,49 @@ class KernelFiles(): """ Parse kernel-doc tags on multiple kernel source files. - There are two type of parsers defined here: + This is the main entry point to run kernel-doc. This class is initialized + using a series of optional arguments: + + ``verbose`` + If True, enables kernel-doc verbosity. Default: False. + + ``out_style`` + Class to be used to format output. If None (default), + only report errors. + + ``xforms`` + Transforms to be applied to C prototypes and data structs. + If not specified, defaults to xforms = CFunction() + + ``werror`` + If True, treat warnings as errors, retuning an error code on warnings. + + Default: False. + + ``wreturn`` + If True, warns about the lack of a return markup on functions. + + Default: False. + ``wshort_desc`` + If True, warns if initial short description is missing. + + Default: False. + + ``wcontents_before_sections`` + If True, warn if there are contents before sections (deprecated). + This option is kept just for backward-compatibility, but it does + nothing, neither here nor at the original Perl script. + + Default: False. + + ``logger`` + Optional logger class instance. + + If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + + Note: + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and ``EXPORT_SYMBOL*`` macros; - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros. -- cgit v1.2.3 From c1873e77434db2c592cfd21dd7d2e34a5c18304f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:41 +0100 Subject: docs: kdoc_output: use a method to emit the .TH header All man emit functions need to add a .TH header. Move the code to a common function, as we'll be addressing some issues at the common code. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <2e55fcfe8724fde08a78635a1a3f8b449a6adf82.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 4210b91dde5f..fb6b90c54c8a 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -607,7 +607,20 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def __init__(self, modulename): + def emit_th(self, name, modulename = None, manual=None): + """Emit a title header line.""" + name = name.strip() + + if not manual: + manual = self.manual + + if not modulename: + modulename = self.modulename + + self.data += f'.TH "{modulename}" {self.section} "{name}" ' + self.data += f'"{self.date}" "{manual}" LINUX\n' + + def __init__(self, modulename, section="9", manual="API Manual"): """ Creates class variables. @@ -616,7 +629,11 @@ class ManFormat(OutputFormat): """ super().__init__() + self.modulename = modulename + self.section = section + self.manual = manual + self.symbols = [] dt = None @@ -632,7 +649,7 @@ class ManFormat(OutputFormat): if not dt: dt = datetime.now() - self.man_date = dt.strftime("%B %Y") + self.date = dt.strftime("%B %Y") def arg_name(self, args, name): """ @@ -724,7 +741,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -734,7 +751,8 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + self.emit_th(out_name, modulename = name, + manual="Kernel Hacker\'s Manual") self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -780,7 +798,7 @@ class ManFormat(OutputFormat): def out_enum(self, fname, name, args): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -813,7 +831,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) full_proto = args.other_stuff["full_proto"] - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -834,7 +852,7 @@ class ManFormat(OutputFormat): purpose = args.get('purpose') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -849,7 +867,7 @@ class ManFormat(OutputFormat): definition = args.get('definition') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" -- cgit v1.2.3 From 43874045faa72b876da361fed4b3c9aeee09ebdb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:42 +0100 Subject: docs: kdoc_output: remove extra attribute on man .TH headers According with modern documents, groff .TH supports up to 5 arguments, but the logic passes 6. Drop the lastest one ("LINUX"). Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index fb6b90c54c8a..d0b237c09391 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -618,7 +618,7 @@ class ManFormat(OutputFormat): modulename = self.modulename self.data += f'.TH "{modulename}" {self.section} "{name}" ' - self.data += f'"{self.date}" "{manual}" LINUX\n' + self.data += f'"{self.date}" "{manual}"\n' def __init__(self, modulename, section="9", manual="API Manual"): """ -- cgit v1.2.3 From 31938f120fa261b983fed3315239fe1c5fc4e6e7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:43 +0100 Subject: docs: kdoc_output: use a single manual for everything There's no reason why functions will be on a different manual. Unify its name, calling it as "Kernel API Manual". Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <000e1174a551e97ad4710ad4f3750b22017bedd5.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index d0b237c09391..24ee1fad681e 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -607,20 +607,17 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def emit_th(self, name, modulename = None, manual=None): + def emit_th(self, name, modulename = None): """Emit a title header line.""" name = name.strip() - if not manual: - manual = self.manual - if not modulename: modulename = self.modulename self.data += f'.TH "{modulename}" {self.section} "{name}" ' - self.data += f'"{self.date}" "{manual}"\n' + self.data += f'"{self.date}" "{self.manual}"\n' - def __init__(self, modulename, section="9", manual="API Manual"): + def __init__(self, modulename, section="9", manual="Kernel API Manual"): """ Creates class variables. @@ -751,8 +748,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.emit_th(out_name, modulename = name, - manual="Kernel Hacker\'s Manual") + self.emit_th(out_name, modulename = name) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" -- cgit v1.2.3 From 1a63342a2774c734b73841fdfa41cf4d8d58cd94 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:44 +0100 Subject: docs: kdoc_output: don't use a different modulename for functions It doesn't make much sense to have a different modulename just for functions, but not for structs/enums/... Use the same header everywere. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <978259bdf3e8d310c646ecf76ce56d054f6d5738.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 24ee1fad681e..62e300e65405 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -607,14 +607,11 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def emit_th(self, name, modulename = None): + def emit_th(self, name): """Emit a title header line.""" name = name.strip() - if not modulename: - modulename = self.modulename - - self.data += f'.TH "{modulename}" {self.section} "{name}" ' + self.data += f'.TH "{self.modulename}" {self.section} "{name}" ' self.data += f'"{self.date}" "{self.manual}"\n' def __init__(self, modulename, section="9", manual="Kernel API Manual"): @@ -748,7 +745,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.emit_th(out_name, modulename = name) + self.emit_th(out_name) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" -- cgit v1.2.3 From 4160533d058cfa667159e8d6a46fe42c738a4a84 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:45 +0100 Subject: docs: kdoc_output: fix naming for DOC markups Right now, DOC markups aren't being handled properly, as it was using the same name for all output. Fix it by filling the title argument on a different way. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <11d809e5c4bec23240d3ace3f342dbb2a9263446.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 62e300e65405..cf834dbf2725 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -607,14 +607,21 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def emit_th(self, name): + def modulename(self, args): + if self._modulename: + return self._modulename + + return os.path.dirname(args.fname) + + def emit_th(self, name, args): """Emit a title header line.""" - name = name.strip() + title = name.strip() + module = self.modulename(args) - self.data += f'.TH "{self.modulename}" {self.section} "{name}" ' - self.data += f'"{self.date}" "{self.manual}"\n' + self.data += f'.TH "{name}" {self.section} "{self.date}" ' + self.data += f'"{self.modulename}" "{self.manual}"\n' - def __init__(self, modulename, section="9", manual="Kernel API Manual"): + def __init__(self, modulename=None, section="9", manual="Kernel API Manual"): """ Creates class variables. @@ -624,7 +631,7 @@ class ManFormat(OutputFormat): super().__init__() - self.modulename = modulename + self._modulename = modulename self.section = section self.manual = manual @@ -658,7 +665,8 @@ class ManFormat(OutputFormat): dtype = args.type if dtype == "doc": - return self.modulename + return name +# return os.path.basename(self.modulename(args)) if dtype in ["function", "typedef"]: return name @@ -735,7 +743,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -745,7 +753,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -791,7 +799,7 @@ class ManFormat(OutputFormat): def out_enum(self, fname, name, args): out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -824,7 +832,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) full_proto = args.other_stuff["full_proto"] - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -841,11 +849,11 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_typedef(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -855,12 +863,12 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_struct(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') definition = args.get('definition') out_name = self.arg_name(args, name) - self.emit_th(out_name) + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" -- cgit v1.2.3 From 26b4fdefc0f96b1e2e25e0482de1476d037ad325 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:46 +0100 Subject: docs: kdoc_output: describe the class init parameters As this class is part of the ABI used by both Sphinx kerneldoc extension and docs/tools/kernel-doc, better describe what parmeters are used to initialize ManOutput class. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <7c57f26150aae11fced259f30898a980b96efb68.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index cf834dbf2725..7a181b40810d 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -580,7 +580,34 @@ class RestFormat(OutputFormat): class ManFormat(OutputFormat): - """Consts and functions used by man pages output.""" + """ + Consts and functions used by man pages output. + + This class has one mandatory parameter and some optional ones, which + are needed to define the title header contents: + + ``modulename`` + Defines the module name to be used at the troff ``.TH`` output. + + This argument is mandatory. + + ``section`` + Usually a numeric value from 0 to 9, but man pages also accept + some strings like "p". + + Defauls to ``9`` + + ``manual`` + Defaults to ``Kernel API Manual``. + + The above controls the output of teh corresponding fields on troff + title headers, which will be filled like this:: + + .TH "{name}" {section} "{date}" "{modulename}" "{manual}" + + where ``name``` will match the API symbol name, and ``date`` will be + either the date where the Kernel was compiled or the current date + """ highlights = ( (type_constant, r"\1"), -- cgit v1.2.3 From e4dadcf510da846f32aaaad5d5988890cbf6033d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:47 +0100 Subject: docs: kdoc_output: pick a better default for modulename Instead of placing the same data for modulename for all generated man pages, use the directory from the filename used to produce kernel docs as basis. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <8a5d91c93c0b9b34c2f60e389f4464742804d0d6.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 7a181b40810d..c25f80a39cdc 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -589,7 +589,8 @@ class ManFormat(OutputFormat): ``modulename`` Defines the module name to be used at the troff ``.TH`` output. - This argument is mandatory. + This argument is optional. If not specified, it will be filled + with the directory which contains the documented file. ``section`` Usually a numeric value from 0 to 9, but man pages also accept @@ -645,8 +646,8 @@ class ManFormat(OutputFormat): title = name.strip() module = self.modulename(args) - self.data += f'.TH "{name}" {self.section} "{self.date}" ' - self.data += f'"{self.modulename}" "{self.manual}"\n' + self.data += f'.TH "{title}" {self.section} "{self.date}" ' + self.data += f'"{module}" "{self.manual}"\n' def __init__(self, modulename=None, section="9", manual="Kernel API Manual"): """ -- cgit v1.2.3 From cde7c96f88a0fe9ed53e8bb57147b19a725cf097 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:48 +0100 Subject: docs: kdoc_output: Change the logic to handle man highlight The code inside ManFormat code to output man pages is too simple: it produces very bad results when the content has tables or code blocks. Change the way lines are parsed there to allow adding extra logic to handle some special cases. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <6ae2301a40b3fcb4381dd9dda8c75d14f9616b46.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index c25f80a39cdc..9caffe0d9753 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -755,15 +755,23 @@ class ManFormat(OutputFormat): if isinstance(contents, list): contents = "\n".join(contents) - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue + lines = contents.strip("\n").split("\n") + i = 0 - if line[0] == ".": - self.data += "\\&" + line + "\n" - else: - self.data += line + "\n" + while i < len(lines): + org_line = lines[i] + + line = KernRe(r"^\s*").sub("", org_line) + + if line: + if line[0] == ".": + self.data += "\\&" + line + "\n" + i += 1 + continue + + i += 1 + + self.data += line + "\n" def out_doc(self, fname, name, args): if not self.check_doc(name, args): -- cgit v1.2.3 From 4ec130cff633361c2217d2ba116ae32772087087 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:49 +0100 Subject: docs: kdoc_output: add a logic to handle tables inside kernel-doc markups specially when DOC is used, it is not uncommon to have tables inside a kernel-doc markup. Add support for simple tables and complex grid tables when output in groff format. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <442ad76442c325044eb9f34a155d5f484341fb35.1772810752.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 130 +++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 9caffe0d9753..7848514a4d22 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -744,6 +744,126 @@ class ManFormat(OutputFormat): return self.data + def emit_table(self, colspec_row, rows): + + if not rows: + return "" + + out = "" + colspec = "\t".join(["l"] * len(rows[0])) + + out += "\n.TS\n" + out += "box;\n" + out += f"{colspec}.\n" + + if colspec_row: + out_row = [] + + for text in colspec_row: + out_row.append(f"\\fB{text}\\fP") + + out += "\t".join(out_row) + "\n_\n" + + for r in rows: + out += "\t".join(r) + "\n" + + out += ".TE\n" + + return out + + def grid_table(self, lines, start): + """ + Ancillary function to help handling a grid table inside the text. + """ + + i = start + 1 + rows = [] + colspec_row = None + + while i < len(lines): + line = lines[i] + + if KernRe(r"^\s*\|.*\|\s*$").match(line): + parts = [] + + for p in line.strip('|').split('|'): + parts.append(p.strip()) + + rows.append(parts) + + elif KernRe(r'^\+\=[\+\=]+\+\s*$').match(line): + if rows and rows[0]: + if not colspec_row: + colspec_row = [""] * len(rows[0]) + + for j in range(0, len(rows[0])): + content = [] + for row in rows: + content.append(row[j]) + + colspec_row[j] = " ".join(content) + + rows = [] + + elif KernRe(r"^\s*\+[-+]+\+.*$").match(line): + pass + + else: + break + + i += 1 + + return i, self.emit_table(colspec_row, rows) + + def simple_table(self, lines, start): + """ + Ancillary function to help handling a simple table inside the text. + """ + + i = start + rows = [] + colspec_row = None + + pos = [] + for m in KernRe(r'\-+').finditer(lines[i]): + pos.append((m.start(), m.end() - 1)) + + i += 1 + while i < len(lines): + line = lines[i] + + if KernRe(r"^\s*[\-]+[ \t\-]+$").match(line): + i += 1 + break + + elif KernRe(r'^[\s=]+$').match(line): + if rows and rows[0]: + if not colspec_row: + colspec_row = [""] * len(rows[0]) + + for j in range(0, len(rows[0])): + content = [] + for row in rows: + content.append(row[j]) + + colspec_row[j] = " ".join(content) + + rows = [] + + else: + row = [""] * len(pos) + + for j in range(0, len(pos)): + start, end = pos[j] + + row[j] = line[start:end].strip() + + rows.append(row) + + i += 1 + + return i, self.emit_table(colspec_row, rows) + def output_highlight(self, block): """ Outputs a C symbol that may require being highlighted with @@ -764,6 +884,16 @@ class ManFormat(OutputFormat): line = KernRe(r"^\s*").sub("", org_line) if line: + if KernRe(r"^\+\-[-+]+\+.*$").match(line): + i, text = self.grid_table(lines, i) + self.data += text + continue + + if KernRe(r"^\-+[ \t]\-[ \t\-]+$").match(line): + i, text = self.simple_table(lines, i) + self.data += text + continue + if line[0] == ".": self.data += "\\&" + line + "\n" i += 1 -- cgit v1.2.3 From 908ae13b1864c05bcde8cfc7127ec147d28f9414 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:50 +0100 Subject: docs: kdoc_output: add support to handle code blocks It is common to have code blocks inside kernel-doc markups. By default, troff will group all lines altogether, producing a very weird output. If a code block is detected by disabling filling inside code blocks, re-enabling it afterwards. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_output.py | 64 ++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 7848514a4d22..df9af444da57 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -864,6 +864,65 @@ class ManFormat(OutputFormat): return i, self.emit_table(colspec_row, rows) + def code_block(self, lines, start): + """ + Ensure that code blocks won't be messed up at the output. + + By default, troff join lines at the same paragraph. Disable it, + on code blocks. + """ + + line = lines[start] + + if "code-block" in line: + out = "\n.nf\n" + elif line.startswith("..") and line.endswith("::"): + # + # Handle note, warning, error, ... markups + # + line = line[2:-1].strip().upper() + out = f"\n.nf\n\\fB{line}\\fP\n" + elif line.endswith("::"): + out = line[:-1] + out += "\n.nf\n" + else: + # Just in case. Should never happen in practice + out = "\n.nf\n" + + i = start + 1 + ident = None + + while i < len(lines): + line = lines[i] + + m = KernRe(r"\S").match(line) + if not m: + out += line + "\n" + i += 1 + continue + + pos = m.start() + if not ident: + if pos > 0: + ident = pos + else: + out += "\n.fi\n" + if i > start + 1: + return i - 1, out + else: + # Just in case. Should never happen in practice + return i, out + + if pos >= ident: + out += line + "\n" + i += 1 + continue + + break + + out += "\n.fi\n" + return i, out + def output_highlight(self, block): """ Outputs a C symbol that may require being highlighted with @@ -894,6 +953,11 @@ class ManFormat(OutputFormat): self.data += text continue + if line.endswith("::") or KernRe(r"\.\.\s+code-block.*::").match(line): + i, text = self.code_block(lines, i) + self.data += text + continue + if line[0] == ".": self.data += "\\&" + line + "\n" i += 1 -- cgit v1.2.3 From ab9150972f21c41d4487e5d4b21cea0ecfe0bb94 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 6 Mar 2026 16:45:51 +0100 Subject: docs: kdoc_output: better handle lists On several functions, the return values are inside a bullet list. Also, on some places, there are numbered lists as well. Use a troff markup to format them, to avoid placing everything on a single line. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_output.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index df9af444da57..08539dd92cbb 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -963,6 +963,14 @@ class ManFormat(OutputFormat): i += 1 continue + # + # Handle lists + # + line = KernRe(r'^[-*]\s+').sub(r'.IP \[bu]\n', line) + line = KernRe(r'^(\d+|a-z)[\.\)]\s+').sub(r'.IP \1\n', line) + else: + line = ".PP\n" + i += 1 self.data += line + "\n" -- cgit v1.2.3 From 9b4e099c221cd118b9dbe720586c1f1c71666d09 Mon Sep 17 00:00:00 2001 From: Ricardo Ungerer Date: Mon, 16 Mar 2026 22:02:17 +0000 Subject: jobserver: Fix typo in docstring This commit fixes small typos in the docstring of jobserver.py. Signed-off-by: Ricardo Ungerer Signed-off-by: Jonathan Corbet Message-ID: <20260316220218.568022-1-ungerer.ricardo@gmail.com> --- tools/lib/python/jobserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py index aba22c33393d..0b1ffdf9f7a3 100755 --- a/tools/lib/python/jobserver.py +++ b/tools/lib/python/jobserver.py @@ -8,14 +8,14 @@ """ Interacts with the POSIX jobserver during the Kernel build time. -A "normal" jobserver task, like the one initiated by a make subrocess would do: +A "normal" jobserver task, like the one initiated by a make subprocess would do: - open read/write file descriptors to communicate with the job server; - ask for one slot by calling:: claim = os.read(reader, 1) - - when the job finshes, call:: + - when the job finishes, call:: os.write(writer, b"+") # os.write(writer, claim) -- cgit v1.2.3 From 023aabb6ccb298add344cab7c00c5f27b10319aa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:21 +0100 Subject: docs: python: add helpers to run unit tests While python internal libraries have support for unit tests, its output is not nice. Add a helper module to improve its output. I wrote this module last year while testing some scripts I used internally. The initial skeleton was generated with the help of LLM tools, but it was higly modified to ensure that it will work as I would expect. Signed-off-by: Mauro Carvalho Chehab Message-ID: <37999041f616ddef41e84cf2686c0264d1a51dc9.1773074166.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/unittest_helper.py | 353 ++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100755 tools/lib/python/unittest_helper.py (limited to 'tools/lib/python') diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py new file mode 100755 index 000000000000..55d444cd73d4 --- /dev/null +++ b/tools/lib/python/unittest_helper.py @@ -0,0 +1,353 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025-2026: Mauro Carvalho Chehab . +# +# pylint: disable=C0103,R0912,R0914,E1101 + +""" +Provides helper functions and classes execute python unit tests. + +Those help functions provide a nice colored output summary of each +executed test and, when a test fails, it shows the different in diff +format when running in verbose mode, like:: + + $ tools/unittests/nested_match.py -v + ... + Traceback (most recent call last): + File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in test_count_limit + self.assertEqual(replaced, "bar(a); bar(b); foo(c)") + ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + AssertionError: 'bar(a) foo(b); foo(c)' != 'bar(a); bar(b); foo(c)' + - bar(a) foo(b); foo(c) + ? ^^^^ + + bar(a); bar(b); foo(c) + ? ^^^^^ + ... + +It also allows filtering what tests will be executed via ``-k`` parameter. + +Typical usage is to do:: + + from unittest_helper import run_unittest + ... + + if __name__ == "__main__": + run_unittest(__file__) + +If passing arguments is needed, on a more complex scenario, it can be +used like on this example:: + + from unittest_helper import TestUnits, run_unittest + ... + env = {'sudo': ""} + ... + if __name__ == "__main__": + runner = TestUnits() + base_parser = runner.parse_args() + base_parser.add_argument('--sudo', action='store_true', + help='Enable tests requiring sudo privileges') + + args = base_parser.parse_args() + + # Update module-level flag + if args.sudo: + env['sudo'] = "1" + + # Run tests with customized arguments + runner.run(__file__, parser=base_parser, args=args, env=env) +""" + +import argparse +import atexit +import os +import re +import unittest +import sys + +from unittest.mock import patch + + +class Summary(unittest.TestResult): + """ + Overrides ``unittest.TestResult`` class to provide a nice colored + summary. When in verbose mode, displays actual/expected difference in + unified diff format. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + #: Dictionary to store organized test results. + self.test_results = {} + + #: max length of the test names. + self.max_name_length = 0 + + def startTest(self, test): + super().startTest(test) + test_id = test.id() + parts = test_id.split(".") + + # Extract module, class, and method names + if len(parts) >= 3: + module_name = parts[-3] + else: + module_name = "" + if len(parts) >= 2: + class_name = parts[-2] + else: + class_name = "" + + method_name = parts[-1] + + # Build the hierarchical structure + if module_name not in self.test_results: + self.test_results[module_name] = {} + + if class_name not in self.test_results[module_name]: + self.test_results[module_name][class_name] = [] + + # Track maximum test name length for alignment + display_name = f"{method_name}:" + + self.max_name_length = max(len(display_name), self.max_name_length) + + def _record_test(self, test, status): + test_id = test.id() + parts = test_id.split(".") + if len(parts) >= 3: + module_name = parts[-3] + else: + module_name = "" + if len(parts) >= 2: + class_name = parts[-2] + else: + class_name = "" + method_name = parts[-1] + self.test_results[module_name][class_name].append((method_name, status)) + + def addSuccess(self, test): + super().addSuccess(test) + self._record_test(test, "OK") + + def addFailure(self, test, err): + super().addFailure(test, err) + self._record_test(test, "FAIL") + + def addError(self, test, err): + super().addError(test, err) + self._record_test(test, "ERROR") + + def addSkip(self, test, reason): + super().addSkip(test, reason) + self._record_test(test, f"SKIP ({reason})") + + def printResults(self): + """ + Print results using colors if tty. + """ + # Check for ANSI color support + use_color = sys.stdout.isatty() + COLORS = { + "OK": "\033[32m", # Green + "FAIL": "\033[31m", # Red + "SKIP": "\033[1;33m", # Yellow + "PARTIAL": "\033[33m", # Orange + "EXPECTED_FAIL": "\033[36m", # Cyan + "reset": "\033[0m", # Reset to default terminal color + } + if not use_color: + for c in COLORS: + COLORS[c] = "" + + # Calculate maximum test name length + if not self.test_results: + return + try: + lengths = [] + for module in self.test_results.values(): + for tests in module.values(): + for test_name, _ in tests: + lengths.append(len(test_name) + 1) # +1 for colon + max_length = max(lengths) + 2 # Additional padding + except ValueError: + sys.exit("Test list is empty") + + # Print results + for module_name, classes in self.test_results.items(): + print(f"{module_name}:") + for class_name, tests in classes.items(): + print(f" {class_name}:") + for test_name, status in tests: + # Get base status without reason for SKIP + if status.startswith("SKIP"): + status_code = status.split()[0] + else: + status_code = status + color = COLORS.get(status_code, "") + print( + f" {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}" + ) + print() + + # Print summary + print(f"\nRan {self.testsRun} tests", end="") + if hasattr(self, "timeTaken"): + print(f" in {self.timeTaken:.3f}s", end="") + print() + + if not self.wasSuccessful(): + print(f"\n{COLORS['FAIL']}FAILED (", end="") + failures = getattr(self, "failures", []) + errors = getattr(self, "errors", []) + if failures: + print(f"failures={len(failures)}", end="") + if errors: + if failures: + print(", ", end="") + print(f"errors={len(errors)}", end="") + print(f"){COLORS['reset']}") + + +def flatten_suite(suite): + """Flatten test suite hierarchy.""" + tests = [] + for item in suite: + if isinstance(item, unittest.TestSuite): + tests.extend(flatten_suite(item)) + else: + tests.append(item) + return tests + + +class TestUnits: + """ + Helper class to set verbosity level. + + This class discover test files, import its unittest classes and + executes the test on it. + """ + def parse_args(self): + """Returns a parser for command line arguments.""" + parser = argparse.ArgumentParser(description="Test runner with regex filtering") + parser.add_argument("-v", "--verbose", action="count", default=1) + parser.add_argument("-f", "--failfast", action="store_true") + parser.add_argument("-k", "--keyword", + help="Regex pattern to filter test methods") + return parser + + def run(self, caller_file=None, pattern=None, + suite=None, parser=None, args=None, env=None): + """ + Execute all tests from the unity test file. + + It contains several optional parameters: + + ``caller_file``: + - name of the file that contains test. + + typical usage is to place __file__ at the caller test, e.g.:: + + if __name__ == "__main__": + TestUnits().run(__file__) + + ``pattern``: + - optional pattern to match multiple file names. Defaults + to basename of ``caller_file``. + + ``suite``: + - an unittest suite initialized by the caller using + ``unittest.TestLoader().discover()``. + + ``parser``: + - an argparse parser. If not defined, this helper will create + one. + + ``args``: + - an ``argparse.Namespace`` data filled by the caller. + + ``env``: + - environment variables that will be passed to the test suite + + At least ``caller_file`` or ``suite`` must be used, otherwise a + ``TypeError`` will be raised. + """ + if not args: + if not parser: + parser = self.parse_args() + args = parser.parse_args() + + if not caller_file and not suite: + raise TypeError("Either caller_file or suite is needed at TestUnits") + + verbose = args.verbose + + if not env: + env = os.environ.copy() + + env["VERBOSE"] = f"{verbose}" + + patcher = patch.dict(os.environ, env) + patcher.start() + # ensure it gets stopped after + atexit.register(patcher.stop) + + + if verbose >= 2: + unittest.TextTestRunner(verbosity=verbose).run = lambda suite: suite + + # Load ONLY tests from the calling file + if not suite: + if not pattern: + pattern = caller_file + + loader = unittest.TestLoader() + suite = loader.discover(start_dir=os.path.dirname(caller_file), + pattern=os.path.basename(caller_file)) + + # Flatten the suite for environment injection + tests_to_inject = flatten_suite(suite) + + # Filter tests by method name if -k specified + if args.keyword: + try: + pattern = re.compile(args.keyword) + filtered_suite = unittest.TestSuite() + for test in tests_to_inject: # Use the pre-flattened list + method_name = test.id().split(".")[-1] + if pattern.search(method_name): + filtered_suite.addTest(test) + suite = filtered_suite + except re.error as e: + sys.stderr.write(f"Invalid regex pattern: {e}\n") + sys.exit(1) + else: + # Maintain original suite structure if no keyword filtering + suite = unittest.TestSuite(tests_to_inject) + + if verbose >= 2: + resultclass = None + else: + resultclass = Summary + + runner = unittest.TextTestRunner(verbosity=args.verbose, + resultclass=resultclass, + failfast=args.failfast) + result = runner.run(suite) + if resultclass: + result.printResults() + + sys.exit(not result.wasSuccessful()) + + +def run_unittest(fname): + """ + Basic usage of TestUnits class. + + Use it when there's no need to pass any extra argument to the tests + with. The recommended way is to place this at the end of each + unittest module:: + + if __name__ == "__main__": + run_unittest(__file__) + """ + TestUnits().run(fname) -- cgit v1.2.3 From b1e64e30fce86e61d3b09f9352b262622f3f0cda Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:23 +0100 Subject: docs: kdoc: don't add broken comments inside prototypes Parsing a file like drivers/scsi/isci/host.h, which contains broken kernel-doc markups makes it create a prototype that contains unmatched end comments. That causes, for instance, struct sci_power_control to be shown this this prototype: struct sci_power_control { * it is not. */ bool timer_started; */ struct sci_timer timer; * requesters field. */ u8 phys_waiting; */ u8 phys_granted_power; * mapped into requesters via struct sci_phy.phy_index */ struct isci_phy *requesters[SCI_MAX_PHYS]; }; as comments won't start with "/*" anymore. Fix the logic to detect such cases, and keep adding the comments inside it. Signed-off-by: Mauro Carvalho Chehab Message-ID: <18e577dbbd538dcc22945ff139fe3638344e14f0.1773074166.git.mchehab+huawei@kernel.org> Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <12ac4a97e2bd5a19d6537122c10098690c38d2c7.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index edf70ba139a5..086579d00b5c 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -1355,6 +1355,12 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line) # else ... ?? def process_inline_text(self, ln, line): -- cgit v1.2.3 From d5265f7af2d284d5421b763f268157b5fa72f806 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:24 +0100 Subject: docs: kdoc: properly handle empty enum arguments Depending on how the enum proto is written, a comma at the end may incorrectly make kernel-doc parse an arg like " ". Strip spaces before checking if arg is empty. Signed-off-by: Mauro Carvalho Chehab Message-ID: <4182bfb7e5f5b4bbaf05cee1bede691e56247eaf.1773074166.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet Message-ID: <640784283d52c5fc52ea597344ecd567e2fb6e22.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 086579d00b5c..4b3c555e6c8e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -810,9 +810,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed -- cgit v1.2.3 From df50e848f67523195ee0b4c6d2c01823e36a15e7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:25 +0100 Subject: docs: add a C tokenizer to be used by kernel-doc Handling C code purely using regular expressions doesn't work well. Add a C tokenizer to help doing it the right way. The tokenizer was written using as basis the Python re documentation tokenizer example from: https://docs.python.org/3/library/re.html#writing-a-tokenizer Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <39787bb8022e10c65df40c746077f7f66d07ffed.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/c_lex.py | 292 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 tools/lib/python/kdoc/c_lex.py (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py new file mode 100644 index 000000000000..9d726f821f3f --- /dev/null +++ b/tools/lib/python/kdoc/c_lex.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. + +Please notice that the code here may rise exceptions to indicate bad +usage inside kdoc to indicate problems at the replace pattern. + +Other errors are logged via log instance. +""" + +import logging +import re + +from .kdoc_re import KernRe + +log = logging.getLogger(__name__) + + +class CToken(): + """ + Data class to define a C token. + """ + + # Tokens that can be used by the parser. Works like an C enum. + + COMMENT = 0 #: A standard C or C99 comment, including delimiter. + STRING = 1 #: A string, including quotation marks. + CHAR = 2 #: A character, including apostophes. + NUMBER = 3 #: A number. + PUNC = 4 #: A puntuation mark: / ``,`` / ``.``. + BEGIN = 5 #: A begin character: ``{`` / ``[`` / ``(``. + END = 6 #: A end character: ``}`` / ``]`` / ``)``. + CPP = 7 #: A preprocessor macro. + HASH = 8 #: The hash character - useful to handle other macros. + OP = 9 #: A C operator (add, subtract, ...). + STRUCT = 10 #: A ``struct`` keyword. + UNION = 11 #: An ``union`` keyword. + ENUM = 12 #: A ``struct`` keyword. + TYPEDEF = 13 #: A ``typedef`` keyword. + NAME = 14 #: A name. Can be an ID or a type. + SPACE = 15 #: Any space characters, including new lines + ENDSTMT = 16 #: End of an statement (``;``). + + BACKREF = 17 #: Not a valid C sequence, but used at sub regex patterns. + + MISMATCH = 255 #: an error indicator: should never happen in practice. + + # Dict to convert from an enum interger into a string. + _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)} + + # Dict to convert from string to an enum-like integer value. + _name_to_val = {k: v for v, k in _name_by_val.items()} + + @staticmethod + def to_name(val): + """Convert from an integer value from CToken enum into a string""" + + return CToken._name_by_val.get(val, f"UNKNOWN({val})") + + @staticmethod + def from_name(name): + """Convert a string into a CToken enum value""" + if name in CToken._name_to_val: + return CToken._name_to_val[name] + + return CToken.MISMATCH + + + def __init__(self, kind, value=None, pos=0, + brace_level=0, paren_level=0, bracket_level=0): + self.kind = kind + self.value = value + self.pos = pos + self.level = (bracket_level, paren_level, brace_level) + + def __repr__(self): + name = self.to_name(self.kind) + if isinstance(self.value, str): + value = '"' + self.value + '"' + else: + value = self.value + + return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})" + +#: Regexes to parse C code, transforming it into tokens. +RE_SCANNER_LIST = [ + # + # Note that \s\S is different than .*, as it also catches \n + # + (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"), + + (CToken.STRING, r'"(?:\\.|[^"\\])*"'), + (CToken.CHAR, r"'(?:\\.|[^'\\])'"), + + (CToken.NUMBER, r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|" + r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"), + + (CToken.ENDSTMT, r"(?:\s+;|;)"), + + (CToken.PUNC, r"[,\.]"), + + (CToken.BEGIN, r"[\[\(\{]"), + + (CToken.END, r"[\]\)\}]"), + + (CToken.CPP, r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"), + + (CToken.HASH, r"#"), + + (CToken.OP, r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%=" + r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"), + + (CToken.STRUCT, r"\bstruct\b"), + (CToken.UNION, r"\bunion\b"), + (CToken.ENUM, r"\benum\b"), + (CToken.TYPEDEF, r"\btypedef\b"), + + (CToken.NAME, r"[A-Za-z_]\w*"), + + (CToken.SPACE, r"\s+"), + + (CToken.BACKREF, r"\\\d+"), + + (CToken.MISMATCH,r"."), +] + +def fill_re_scanner(token_list): + """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex""" + re_tokens = [] + + for kind, pattern in token_list: + name = CToken.to_name(kind) + re_tokens.append(f"(?P<{name}>{pattern})") + + return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL) + +#: Handle C continuation lines. +RE_CONT = KernRe(r"\\\n") + +RE_COMMENT_START = KernRe(r'/\*\s*') + +#: tokenizer regex. Will be filled at the first CTokenizer usage. +RE_SCANNER = fill_re_scanner(RE_SCANNER_LIST) + + +class CTokenizer(): + """ + Scan C statements and definitions and produce tokens. + + When converted to string, it drops comments and handle public/private + values, respecting depth. + """ + + # This class is inspired and follows the basic concepts of: + # https://docs.python.org/3/library/re.html#writing-a-tokenizer + + def __init__(self, source=None, log=None): + """ + Create a regular expression to handle RE_SCANNER_LIST. + + While I generally don't like using regex group naming via: + (?P...) + + in this particular case, it makes sense, as we can pick the name + when matching a code via RE_SCANNER. + """ + + self.tokens = [] + + if not source: + return + + if isinstance(source, list): + self.tokens = source + return + + # + # While we could just use _tokenize directly via interator, + # As we'll need to use the tokenizer several times inside kernel-doc + # to handle macro transforms, cache the results on a list, as + # re-using it is cheaper than having to parse everytime. + # + for tok in self._tokenize(source): + self.tokens.append(tok) + + def _tokenize(self, source): + """ + Iterator that parses ``source``, splitting it into tokens, as defined + at ``self.RE_SCANNER_LIST``. + + The interactor returns a CToken class object. + """ + + # Handle continuation lines. Note that kdoc_parser already has a + # logic to do that. Still, let's keep it for completeness, as we might + # end re-using this tokenizer outsize kernel-doc some day - or we may + # eventually remove from there as a future cleanup. + source = RE_CONT.sub("", source) + + brace_level = 0 + paren_level = 0 + bracket_level = 0 + + for match in RE_SCANNER.finditer(source): + kind = CToken.from_name(match.lastgroup) + pos = match.start() + value = match.group() + + if kind == CToken.MISMATCH: + log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'") + elif kind == CToken.BEGIN: + if value == '(': + paren_level += 1 + elif value == '[': + bracket_level += 1 + else: # value == '{' + brace_level += 1 + + elif kind == CToken.END: + if value == ')' and paren_level > 0: + paren_level -= 1 + elif value == ']' and bracket_level > 0: + bracket_level -= 1 + elif brace_level > 0: # value == '}' + brace_level -= 1 + + yield CToken(kind, value, pos, + brace_level, paren_level, bracket_level) + + def __str__(self): + out="" + show_stack = [True] + + for i, tok in enumerate(self.tokens): + if tok.kind == CToken.BEGIN: + show_stack.append(show_stack[-1]) + + elif tok.kind == CToken.END: + prev = show_stack[-1] + if len(show_stack) > 1: + show_stack.pop() + + if not prev and show_stack[-1]: + # + # Try to preserve indent + # + out += "\t" * (len(show_stack) - 1) + + out += str(tok.value) + continue + + elif tok.kind == CToken.COMMENT: + comment = RE_COMMENT_START.sub("", tok.value) + + if comment.startswith("private:"): + show_stack[-1] = False + show = False + elif comment.startswith("public:"): + show_stack[-1] = True + + continue + + if not show_stack[-1]: + continue + + if i < len(self.tokens) - 1: + next_tok = self.tokens[i + 1] + + # Do some cleanups before ";" + + if (tok.kind == CToken.SPACE and + next_tok.kind == CToken.PUNC and + next_tok.value == ";"): + + continue + + if (tok.kind == CToken.PUNC and + next_tok.kind == CToken.PUNC and + tok.value == ";" and + next_tok.kind == CToken.PUNC and + next_tok.value == ";"): + + continue + + out += str(tok.value) + + return out -- cgit v1.2.3 From cd77a9aa20ef53a03e5bb2630a5e7b16b910f198 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:26 +0100 Subject: docs: kdoc: use tokenizer to handle comments on structs Better handle comments inside structs. After those changes, all unittests now pass: test_private: TestPublicPrivate: test balanced_inner_private: OK test balanced_non_greddy_private: OK test balanced_private: OK test no private: OK test unbalanced_inner_private: OK test unbalanced_private: OK test unbalanced_struct_group_tagged_with_private: OK test unbalanced_two_struct_group_tagged_first_with_private: OK test unbalanced_without_end_of_line: OK Ran 9 tests This also solves a bug when handling STRUCT_GROUP() with a private comment on it: @@ -397134,7 +397134,7 @@ basic V4L2 device-level support. unsigned int max_len; unsigned int offset; struct page_pool_params_slow slow; - STRUCT_GROUP( struct net_device *netdev; + struct net_device *netdev; unsigned int queue_idx; unsigned int flags; }; Signed-off-by: Mauro Carvalho Chehab Message-ID: Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <054763260f7b5459ad0738ed906d7c358d640692.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 4b3c555e6c8e..62d8030cf532 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,6 +13,7 @@ import sys import re from pprint import pformat +from kdoc.c_lex import CTokenizer from kdoc.kdoc_re import NestedMatch, KernRe from kdoc.kdoc_item import KdocItem @@ -84,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ -- cgit v1.2.3 From f1cf9f7cd66f1f90c4c3beb0885b6f7771e1b419 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:30 +0100 Subject: docs: kdoc: create a CMatch to match nested C blocks The NextMatch code is complex, and will become even more complex if we add there support for arguments. Now that we have a tokenizer, we can use a better solution, easier to be understood. Yet, to improve performance, it is better to make it use a previously tokenized code, changing its ABI. So, reimplement NextMatch using the CTokener class. Once it is done, we can drop NestedMatch. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/c_lex.py | 121 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 111 insertions(+), 10 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index 9d726f821f3f..5da472734ff7 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -273,20 +273,121 @@ class CTokenizer(): # Do some cleanups before ";" - if (tok.kind == CToken.SPACE and - next_tok.kind == CToken.PUNC and - next_tok.value == ";"): - + if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT: continue - if (tok.kind == CToken.PUNC and - next_tok.kind == CToken.PUNC and - tok.value == ";" and - next_tok.kind == CToken.PUNC and - next_tok.value == ";"): - + if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind: continue out += str(tok.value) return out + + +class CMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern:: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parentheses of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + # TODO: add a sub method + + def __init__(self, regex): + self.regex = KernRe(regex) + + def _search(self, tokenizer): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to raise exceptions + to cover such issues. + """ + + start = None + offset = -1 + started = False + + import sys + + stack = [] + + for i, tok in enumerate(tokenizer.tokens): + if start is None: + if tok.kind == CToken.NAME and self.regex.match(tok.value): + start = i + stack.append((start, tok.level)) + started = False + + continue + + if not started and tok.kind == CToken.BEGIN: + started = True + continue + + if tok.kind == CToken.END and tok.level == stack[-1][1]: + start, level = stack.pop() + offset = i + + yield CTokenizer(tokenizer.tokens[start:offset + 1]) + start = None + + # + # If an END zeroing levels is not there, return remaining stuff + # This is meant to solve cases where the caller logic might be + # picking an incomplete block. + # + if start and offset < 0: + print("WARNING: can't find an end", file=sys.stderr) + yield CTokenizer(tokenizer.tokens[start:]) + + def search(self, source): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + if isinstance(source, CTokenizer): + tokenizer = source + is_token = True + else: + tokenizer = CTokenizer(source) + is_token = False + + for new_tokenizer in self._search(tokenizer): + if is_token: + yield new_tokenizer + else: + yield str(new_tokenizer) -- cgit v1.2.3 From 9aaeb817ef4f794d1dbb8736332a64b5dae9521c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:32 +0100 Subject: docs: c_lex: properly implement a sub() method for CMatch Implement a sub() method to do what it is expected, parsing backref arguments like \0, \1, \2, ... Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/c_lex.py | 272 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 259 insertions(+), 13 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index 5da472734ff7..20e50ff0ecd5 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -16,6 +16,8 @@ Other errors are logged via log instance. import logging import re +from copy import copy + from .kdoc_re import KernRe log = logging.getLogger(__name__) @@ -284,6 +286,172 @@ class CTokenizer(): return out +class CTokenArgs: + """ + Ancillary class to help using backrefs from sub matches. + + If the highest backref contain a "+" at the last element, + the logic will be greedy, picking all other delims. + + This is needed to parse struct_group macros with end with ``MEMBERS...``. + """ + def __init__(self, sub_str): + self.sub_groups = set() + self.max_group = -1 + self.greedy = None + + for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str): + group = int(m.group(1)) + if m.group(2) == "+": + if self.greedy and self.greedy != group: + raise ValueError("There are multiple greedy patterns!") + self.greedy = group + + self.sub_groups.add(group) + self.max_group = max(self.max_group, group) + + if self.greedy: + if self.greedy != self.max_group: + raise ValueError("Greedy pattern is not the last one!") + + sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str) + + self.sub_str = sub_str + self.sub_tokeninzer = CTokenizer(sub_str) + + def groups(self, new_tokenizer): + """ + Create replacement arguments for backrefs like: + + ``\0``, ``\1``, ``\2``, ...``\n`` + + It also accepts a ``+`` character to the highest backref. When used, + it means in practice to ignore delimins after it, being greedy. + + The logic is smart enough to only go up to the maximum required + argument, even if there are more. + + If there is a backref for an argument above the limit, it will + raise an exception. Please notice that, on C, square brackets + don't have any separator on it. Trying to use ``\1``..``\n`` for + brackets also raise an exception. + """ + + level = (0, 0, 0) + + if self.max_group < 0: + return level, [] + + tokens = new_tokenizer.tokens + + # + # Fill \0 with the full token contents + # + groups_list = [ [] ] + + if 0 in self.sub_groups: + inner_level = 0 + + for i in range(0, len(tokens)): + tok = tokens[i] + + if tok.kind == CToken.BEGIN: + inner_level += 1 + + # + # Discard first begin + # + if not groups_list[0]: + continue + elif tok.kind == CToken.END: + inner_level -= 1 + if inner_level < 0: + break + + if inner_level: + groups_list[0].append(tok) + + if not self.max_group: + return level, groups_list + + delim = None + + # + # Ignore everything before BEGIN. The value of begin gives the + # delimiter to be used for the matches + # + for i in range(0, len(tokens)): + tok = tokens[i] + if tok.kind == CToken.BEGIN: + if tok.value == "{": + delim = ";" + elif tok.value == "(": + delim = "," + else: + self.log.error(fr"Can't handle \1..\n on {sub_str}") + + level = tok.level + break + + pos = 1 + groups_list.append([]) + + inner_level = 0 + for i in range(i + 1, len(tokens)): + tok = tokens[i] + + if tok.kind == CToken.BEGIN: + inner_level += 1 + if tok.kind == CToken.END: + inner_level -= 1 + if inner_level < 0: + break + + if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value: + pos += 1 + if self.greedy and pos > self.max_group: + pos -= 1 + else: + groups_list.append([]) + + if pos > self.max_group: + break + + continue + + groups_list[pos].append(tok) + + if pos < self.max_group: + log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}") + + return level, groups_list + + def tokens(self, new_tokenizer): + level, groups = self.groups(new_tokenizer) + + new = CTokenizer() + + for tok in self.sub_tokeninzer.tokens: + if tok.kind == CToken.BACKREF: + group = int(tok.value[1:]) + + for group_tok in groups[group]: + new_tok = copy(group_tok) + + new_level = [0, 0, 0] + + for i in range(0, len(level)): + new_level[i] = new_tok.level[i] + level[i] + + new_tok.level = tuple(new_level) + + new.tokens += [ new_tok ] + else: + new.tokens += [ tok ] + + return new.tokens + + class CMatch: """ Finding nested delimiters is hard with regular expressions. It is @@ -309,10 +477,10 @@ class CMatch: will ignore the search string. """ - # TODO: add a sub method - def __init__(self, regex): - self.regex = KernRe(regex) + def __init__(self, regex, delim="("): + self.regex = KernRe("^" + regex + r"\b") + self.start_delim = delim def _search(self, tokenizer): """ @@ -335,7 +503,6 @@ class CMatch: """ start = None - offset = -1 started = False import sys @@ -351,15 +518,24 @@ class CMatch: continue - if not started and tok.kind == CToken.BEGIN: - started = True - continue + if not started: + if tok.kind == CToken.SPACE: + continue + + if tok.kind == CToken.BEGIN and tok.value == self.start_delim: + started = True + continue + + # Name only token without BEGIN/END + if i > start: + i -= 1 + yield start, i + start = None if tok.kind == CToken.END and tok.level == stack[-1][1]: start, level = stack.pop() - offset = i - yield CTokenizer(tokenizer.tokens[start:offset + 1]) + yield start, i start = None # @@ -367,9 +543,12 @@ class CMatch: # This is meant to solve cases where the caller logic might be # picking an incomplete block. # - if start and offset < 0: - print("WARNING: can't find an end", file=sys.stderr) - yield CTokenizer(tokenizer.tokens[start:]) + if start and stack: + if started: + s = str(tokenizer) + log.warning(f"can't find a final end at {s}") + + yield start, len(tokenizer.tokens) def search(self, source): """ @@ -386,8 +565,75 @@ class CMatch: tokenizer = CTokenizer(source) is_token = False - for new_tokenizer in self._search(tokenizer): + for start, end in self._search(tokenizer): + new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1]) + if is_token: yield new_tokenizer else: yield str(new_tokenizer) + + def sub(self, sub_str, source, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if the sub argument contains:: + + r'\0' + + it will work just like re: it places there the matched paired data + with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + if isinstance(source, CTokenizer): + is_token = True + tokenizer = source + else: + is_token = False + tokenizer = CTokenizer(source) + + # Detect if sub_str contains sub arguments + + args_match = CTokenArgs(sub_str) + + new_tokenizer = CTokenizer() + pos = 0 + n = 0 + + # + # NOTE: the code below doesn't consider overlays at sub. + # We may need to add some extra unit tests to check if those + # would cause problems. When replacing by "", this should not + # be a problem, but other transformations could be problematic + # + for start, end in self._search(tokenizer): + new_tokenizer.tokens += tokenizer.tokens[pos:start] + + new = CTokenizer(tokenizer.tokens[start:end + 1]) + + new_tokenizer.tokens += args_match.tokens(new) + + pos = end + 1 + + n += 1 + if count and n >= count: + break + + new_tokenizer.tokens += tokenizer.tokens[pos:] + + if not is_token: + return str(new_tokenizer) + + return new_tokenizer + + def __repr__(self): + """ + Returns a displayable version of the class init. + """ + + return f'CMatch("{self.regex.regex.pattern}")' -- cgit v1.2.3 From 600079fdcf46fafe15b4ccd62804d66e05309cc6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:34 +0100 Subject: docs: kdoc: replace NestedMatch with CMatch Our previous approach to solve nested structs were to use NestedMatch. It works well, but adding support to parse delimiters is very complex. Instead, use CMatch, which uses a C tokenizer, making the code more reliable and simpler. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <900bff66f8093402999f9fe055fbfa3fa33a8d8b.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 2 +- tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 62d8030cf532..efd58c88ff31 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -14,7 +14,7 @@ import re from pprint import pformat from kdoc.c_lex import CTokenizer -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index c07cbe1e6349..7fa7f52cec7b 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -4,7 +4,8 @@ import re -from kdoc.kdoc_re import KernRe, NestedMatch +from kdoc.kdoc_re import KernRe +from kdoc.c_lex import CMatch struct_args_pattern = r'([^,)]+)' @@ -60,7 +61,7 @@ class CTransforms: # # As it doesn't properly match the end parenthesis on some cases. # - # So, a better solution was crafted: there's now a NestedMatch + # So, a better solution was crafted: there's now a CMatch # class that ensures that delimiters after a search are properly # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. @@ -72,9 +73,9 @@ class CTransforms: # # Replace macros # - # TODO: use NestedMatch for FOO($1, $2, ...) matches + # TODO: use CMatch for FOO($1, $2, ...) matches # - # it is better to also move those to the NestedMatch logic, + # it is better to also move those to the CMatch logic, # to ensure that parentheses will be properly matched. # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), @@ -95,17 +96,17 @@ class CTransforms: (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - (NestedMatch(r"__cond_acquires\s*\("), ""), - (NestedMatch(r"__cond_releases\s*\("), ""), - (NestedMatch(r"__acquires\s*\("), ""), - (NestedMatch(r"__releases\s*\("), ""), - (NestedMatch(r"__must_hold\s*\("), ""), - (NestedMatch(r"__must_not_hold\s*\("), ""), - (NestedMatch(r"__must_hold_shared\s*\("), ""), - (NestedMatch(r"__cond_acquires_shared\s*\("), ""), - (NestedMatch(r"__acquires_shared\s*\("), ""), - (NestedMatch(r"__releases_shared\s*\("), ""), - (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'), + (CMatch(r"__cond_acquires"), ""), + (CMatch(r"__cond_releases"), ""), + (CMatch(r"__acquires"), ""), + (CMatch(r"__releases"), ""), + (CMatch(r"__must_hold"), ""), + (CMatch(r"__must_not_hold"), ""), + (CMatch(r"__must_hold_shared"), ""), + (CMatch(r"__cond_acquires_shared"), ""), + (CMatch(r"__acquires_shared"), ""), + (CMatch(r"__releases_shared"), ""), + (CMatch(r"STRUCT_GROUP"), r'\0'), ] #: Transforms for function prototypes. -- cgit v1.2.3 From ae63a5b9203bcb3dce4819c07409b27734180eea Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:35 +0100 Subject: docs: kdoc_re: get rid of NestedMatch class Now that everything was converted to CMatch, we can get rid of the previous NestedMatch implementation. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_re.py | 201 --------------------------------------- 1 file changed, 201 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 085b89a4547c..6f3ae28859ea 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -140,204 +140,3 @@ class KernRe: """ return self.last_match.groups() - -#: Nested delimited pairs (brackets and parenthesis) -DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', -} - -#: compiled delimiters -RE_DELIM = KernRe(r'[\{\}\[\]\(\)]') - - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern:: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parentheses of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter-aligned. - # - # The content inside parentheses is converted into a single replace - # group (e.g. r`\0'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to: - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - def __init__(self, regex): - self.regex = KernRe(regex) - - def _search(self, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and places them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm should work fine for properly paired lines, but will - silently ignore end delimiters that precede a start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to raise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in self.regex.finditer(line): - start = match_re.start() - offset = match_re.end() - string_char = None - escape = False - - d = line[offset - 1] - if d not in DELIMITER_PAIRS: - continue - - end = DELIMITER_PAIRS[d] - stack.append(end) - - for match in RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if escape: - escape = False - continue - - if string_char: - if d == '\\': - escape = True - elif d == string_char: - string_char = None - - continue - - if d in ('"', "'"): - string_char = d - continue - - if d in DELIMITER_PAIRS: - end = DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, line): - """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. - """ - - for t in self._search(line): - - yield line[t[0]:t[2]] - - def sub(self, sub, line, count=0): - """ - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if the sub argument contains:: - - r'\0' - - it will work just like re: it places there the matched paired data - with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \0 at the sub string, if \0 is used there - new_sub = sub - new_sub = new_sub.replace(r'\0', value) - - out += new_sub - - # Drop end ';' if any - if pos < len(line) and line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out - - def __repr__(self): - """ - Returns a displayable version of the class init. - """ - - return f'NestedMatch("{self.regex.regex.pattern}")' -- cgit v1.2.3 From f63e6163c7e4f988b2ff35721ffc86b95425293f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:36 +0100 Subject: docs: xforms_lists: handle struct_group directly The previous logic was handling struct_group on two steps. Remove the previous approach, as CMatch can do it the right way on a single step. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/xforms_lists.py | 53 ++++------------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 7fa7f52cec7b..98632c50a146 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -32,52 +32,6 @@ class CTransforms: (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), (KernRe(r'\s*____cacheline_aligned', re.S), ' '), (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a CMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use CMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the CMatch logic, - # to ensure that parentheses will be properly matched. - # (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), @@ -106,7 +60,12 @@ class CTransforms: (CMatch(r"__cond_acquires_shared"), ""), (CMatch(r"__acquires_shared"), ""), (CMatch(r"__releases_shared"), ""), - (CMatch(r"STRUCT_GROUP"), r'\0'), + + (CMatch('struct_group'), r'\2'), + (CMatch('struct_group_attr'), r'\3'), + (CMatch('struct_group_tagged'), r'struct \1 \2; \3'), + (CMatch('__struct_group'), r'\4'), + ] #: Transforms for function prototypes. -- cgit v1.2.3 From 2f07ddbd5793df4ec24f727322cc68065feb3568 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:37 +0100 Subject: docs: xforms_lists: better evaluate struct_group macros The previous approach were to unwind nested structs/unions. Now that we have a logic that can handle it well, use it to ensure that struct_group macros will properly reflect the actual struct. Note that the replacemend logic still simplifies the code a little bit, as the basic build block for struct group is: union { \ struct { MEMBERS } ATTRS; \ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ } ATTRS There: - ATTRS is meant to add extra macro attributes like __packed which we already discard, as they aren't relevant to document struct members; - TAG is used only when built with __cplusplus. So, instead, convert them into just: struct { MEMBERS }; Please notice that here, we're using the greedy version of the backrefs, as MEMBERS is actually MEMBERS... on all such macros. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <24bf2c036b08814d9b4aabc27542fd3b2ff54424.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/xforms_lists.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 98632c50a146..2056572852fd 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -61,10 +61,16 @@ class CTransforms: (CMatch(r"__acquires_shared"), ""), (CMatch(r"__releases_shared"), ""), - (CMatch('struct_group'), r'\2'), - (CMatch('struct_group_attr'), r'\3'), - (CMatch('struct_group_tagged'), r'struct \1 \2; \3'), - (CMatch('__struct_group'), r'\4'), + # + # Macro __struct_group() creates an union with an anonymous + # and a non-anonymous struct, depending on the parameters. We only + # need one of those at kernel-doc, as we won't be documenting the same + # members twice. + # + (CMatch('struct_group'), r'struct { \2+ };'), + (CMatch('struct_group_attr'), r'struct { \3+ };'), + (CMatch('struct_group_tagged'), r'struct { \3+ };'), + (CMatch('__struct_group'), r'struct { \4+ };'), ] -- cgit v1.2.3 From 024e200e2a89d71dceff7d1bff8ae77b145726e0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:38 +0100 Subject: docs: c_lex: setup a logger to report tokenizer issues Report file that has issues detected via CMatch and CTokenizer. This is done by setting up a logger that will be overriden by kdoc_parser, when used on it. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Aleksandr Loktionov Signed-off-by: Jonathan Corbet Message-ID: <903ad83ae176196a50444e66177a4f5bcdef5199.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/c_lex.py | 16 ++++++++++++++++ tools/lib/python/kdoc/kdoc_parser.py | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index 20e50ff0ecd5..b6d58bd470a9 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -22,6 +22,22 @@ from .kdoc_re import KernRe log = logging.getLogger(__name__) +def tokenizer_set_log(logger, prefix = ""): + """ + Replace the module‑level logger with a LoggerAdapter that + prepends *prefix* to every message. + """ + global log + + class PrefixAdapter(logging.LoggerAdapter): + """ + Ancillary class to set prefix on all message logs. + """ + def process(self, msg, kwargs): + return f"{prefix}{msg}", kwargs + + # Wrap the provided logger in our adapter + log = PrefixAdapter(logger, {"prefix": prefix}) class CToken(): """ diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index efd58c88ff31..f90c6dd0343d 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,7 @@ import sys import re from pprint import pformat -from kdoc.c_lex import CTokenizer +from kdoc.c_lex import CTokenizer, tokenizer_set_log from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem @@ -253,6 +253,8 @@ class KernelDoc: self.config = config self.xforms = xforms + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") + # Initial state for the state machines self.state = state.NORMAL -- cgit v1.2.3 From 12aa7753ff4c5fea405d139bcf67f49bda2c932e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:40 +0100 Subject: docs: kdoc: ensure that comments are dropped before calling split_struct_proto() Changeset 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes") revealed a hidden bug at split_struct_proto(): some comments there may break its capability of properly identifying a struct. Fixing it is as simple as stripping comments before calling it. Fixes: 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f90c6dd0343d..8b2c9d0f0c58 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -723,6 +723,7 @@ class KernelDoc: # # Do the basic parse to get the pieces of the declaration. # + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -763,6 +764,7 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -770,7 +772,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -778,7 +780,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # -- cgit v1.2.3 From 79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:41 +0100 Subject: docs: kdoc_parser: avoid tokenizing structs everytime Most of the rules inside CTransforms are of the type CMatch. Don't re-parse the source code every time. Doing this doesn't change the output, but makes kdoc almost as fast as before the tokenizer patches: # Before tokenizer patches $ time ./scripts/kernel-doc . -man >original 2>&1 real 0m42.933s user 0m36.523s sys 0m1.145s # After tokenizer patches $ time ./scripts/kernel-doc . -man >before 2>&1 real 1m29.853s user 1m23.974s sys 0m1.237s # After this patch $ time ./scripts/kernel-doc . -man >after 2>&1 real 0m48.579s user 0m45.938s sys 0m0.988s $ diff -s before after Files before and after are identical Manually checked the differences between original and after with: $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less They're due: - whitespace fixes; - struct_group are now better handled; - several badly-generated man pages from broken inline kernel-doc markups are now fixed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_parser.py | 1 - tools/lib/python/kdoc/xforms_lists.py | 30 ++++++++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 8b2c9d0f0c58..f6c4ee3b18c9 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -737,7 +737,6 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) members = self.xforms.apply("struct", members) # diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 2056572852fd..5a62d4a450cb 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -5,7 +5,7 @@ import re from kdoc.kdoc_re import KernRe -from kdoc.c_lex import CMatch +from kdoc.c_lex import CMatch, CTokenizer struct_args_pattern = r'([^,)]+)' @@ -16,6 +16,12 @@ class CTransforms: into something we can parse and generate kdoc for. """ + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + #: Transforms for structs and unions. struct_xforms = [ # Strip attributes @@ -124,13 +130,25 @@ class CTransforms: "var": var_xforms, } - def apply(self, xforms_type, text): + def apply(self, xforms_type, source): """ - Apply a set of transforms to a block of text. + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. """ if xforms_type not in self.xforms: - return text + return source + + if isinstance(source, str): + source = CTokenizer(source) for search, subst in self.xforms[xforms_type]: - text = search.sub(subst, text) - return text + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source) -- cgit v1.2.3 From 7538df7a2d7d26428803cf8053476169a6d28659 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2026 19:09:42 +0100 Subject: docs: xforms_lists: use CMatch for all identifiers CMatch is lexically correct and replaces only identifiers, which is exactly where macro transformations happen. Use it to make the output safer and ensure that all arguments will be parsed the right way, even on complex cases. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <86d4a07ff0e054207747fabf38d6bb261b52b5fa.1773770483.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/xforms_lists.py | 159 +++++++++++++++++----------------- 1 file changed, 79 insertions(+), 80 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py index 5a62d4a450cb..f6ea9efb11ae 100644 --- a/tools/lib/python/kdoc/xforms_lists.py +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -7,7 +7,8 @@ import re from kdoc.kdoc_re import KernRe from kdoc.c_lex import CMatch, CTokenizer -struct_args_pattern = r'([^,)]+)' +struct_args_pattern = r"([^,)]+)" + class CTransforms: """ @@ -24,48 +25,40 @@ class CTransforms: #: Transforms for structs and unions. struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'), - - (CMatch(r"__cond_acquires"), ""), - (CMatch(r"__cond_releases"), ""), - (CMatch(r"__acquires"), ""), - (CMatch(r"__releases"), ""), - (CMatch(r"__must_hold"), ""), - (CMatch(r"__must_not_hold"), ""), - (CMatch(r"__must_hold_shared"), ""), - (CMatch(r"__cond_acquires_shared"), ""), - (CMatch(r"__acquires_shared"), ""), - (CMatch(r"__releases_shared"), ""), + (CMatch("__attribute__"), ""), + (CMatch("__aligned"), ""), + (CMatch("__counted_by"), ""), + (CMatch("__counted_by_(le|be)"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("__packed"), ""), + (CMatch("CRYPTO_MINALIGN_ATTR"), ""), + (CMatch("__private"), ""), + (CMatch("__rcu"), ""), + (CMatch("____cacheline_aligned_in_smp"), ""), + (CMatch("____cacheline_aligned"), ""), + (CMatch("__cacheline_group_(?:begin|end)"), ""), + (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"), + (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"), + (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"), + (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"), + (CMatch("DECLARE_KFIFO"), r"\2 *\1"), + (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"), + (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"), + (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"), + (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"), + (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"), + (CMatch("__cond_acquires"), ""), + (CMatch("__cond_releases"), ""), + (CMatch("__acquires"), ""), + (CMatch("__releases"), ""), + (CMatch("__must_hold"), ""), + (CMatch("__must_not_hold"), ""), + (CMatch("__must_hold_shared"), ""), + (CMatch("__cond_acquires_shared"), ""), + (CMatch("__acquires_shared"), ""), + (CMatch("__releases_shared"), ""), + (CMatch("__attribute__"), ""), # # Macro __struct_group() creates an union with an anonymous @@ -73,51 +66,57 @@ class CTransforms: # need one of those at kernel-doc, as we won't be documenting the same # members twice. # - (CMatch('struct_group'), r'struct { \2+ };'), - (CMatch('struct_group_attr'), r'struct { \3+ };'), - (CMatch('struct_group_tagged'), r'struct { \3+ };'), - (CMatch('__struct_group'), r'struct { \4+ };'), - + (CMatch("struct_group"), r"struct { \2+ };"), + (CMatch("struct_group_attr"), r"struct { \3+ };"), + (CMatch("struct_group_tagged"), r"struct { \3+ };"), + (CMatch("__struct_group"), r"struct { \4+ };"), ] #: Transforms for function prototypes. function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__no_context_analysis\s*"), ""), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), + (CMatch("static"), ""), + (CMatch("extern"), ""), + (CMatch("asmlinkage"), ""), + (CMatch("inline"), ""), + (CMatch("__inline__"), ""), + (CMatch("__inline"), ""), + (CMatch("__always_inline"), ""), + (CMatch("noinline"), ""), + (CMatch("__FORTIFY_INLINE"), ""), + (CMatch("__init"), ""), + (CMatch("__init_or_module"), ""), + (CMatch("__exit"), ""), + (CMatch("__deprecated"), ""), + (CMatch("__flatten"), ""), + (CMatch("__meminit"), ""), + (CMatch("__must_check"), ""), + (CMatch("__weak"), ""), + (CMatch("__sched"), ""), + (CMatch("__always_unused"), ""), + (CMatch("__printf"), ""), + (CMatch("__(?:re)?alloc_size"), ""), + (CMatch("__diagnose_as"), ""), + (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"), + (CMatch("__no_context_analysis"), ""), + (CMatch("__attribute_const__"), ""), + (CMatch("__attribute__"), ""), + + # + # HACK: this is similar to process_export() hack. It is meant to + # drop _noproof from function name. See for instance: + # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h. + # + (KernRe("_noprof"), ""), ] #: Transforms for variable prototypes. var_xforms = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""), - (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"), + (CMatch("__read_mostly"), ""), + (CMatch("__ro_after_init"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("LIST_HEAD"), r"struct list_head \1"), + (KernRe(r"(?://.*)$"), ""), (KernRe(r"(?:/\*.*\*/)"), ""), (KernRe(r";$"), ""), -- cgit v1.2.3 From b2d231f4a77800661b3fb812d997841a548c6526 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:02 +0100 Subject: docs: kdoc_re: better represent long regular expressions The Sphinx output from autodoc doesn't automatically break long lines, except on spaces. Change KernRe __repr__() to break the pattern on multiple strings, each one with a maximum limit of 60 characters. With that, documentation output for KernRe should now be displayable, even on long strings. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <60c264a9d277fed655b1a62df2195562c8596090.1773823995.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_re.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 6f3ae28859ea..28292efe25a2 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -70,10 +70,15 @@ class KernRe: flags_name = " | ".join(flags) + max_len = 60 + pattern = "" + for pos in range(0, len(self.regex.pattern), max_len): + pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" ' + if flags_name: - return f'KernRe("{self.regex.pattern}", {flags_name})' + return f'KernRe({pattern}, {flags_name})' else: - return f'KernRe("{self.regex.pattern}")' + return f'KernRe({pattern})' def __add__(self, other): """ -- cgit v1.2.3 From 8c0b7c0d3c0e640b3ebb7f1f648ea322e56c227a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:03 +0100 Subject: docs: kdoc: add c_lex to generated documentation Do some fixes at groups() description for it to be parsed by Sphinx and add it to the documentation. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <799178cf30dd4022fdb1d029ba998a458e037b52.1773823995.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/c_lex.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index b6d58bd470a9..e01b154f458e 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -336,13 +336,14 @@ class CTokenArgs: self.sub_tokeninzer = CTokenizer(sub_str) def groups(self, new_tokenizer): - """ + r""" Create replacement arguments for backrefs like: - ``\0``, ``\1``, ``\2``, ...``\n`` + ``\0``, ``\1``, ``\2``, ... ``\{number}`` - It also accepts a ``+`` character to the highest backref. When used, - it means in practice to ignore delimins after it, being greedy. + It also accepts a ``+`` character to the highest backref, like + ``\4+``. When used, the backref will be greedy, picking all other + arguments afterwards. The logic is smart enough to only go up to the maximum required argument, even if there are more. -- cgit v1.2.3 From e0ebee442d56c11df023b7c2d32edc3b0765b2f3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:04 +0100 Subject: docs: kdoc_files: use a class to group config parameters Instead of abusing argparse.Namespace, define a class to store configuration parameters and logger. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_files.py | 45 +++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 8c2059623949..1c5cb9e5f0e8 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -9,7 +9,6 @@ Classes for navigating through the files that kernel-doc needs to handle to generate documentation. """ -import argparse import logging import os import re @@ -87,6 +86,28 @@ class GlobSourceFiles: file_not_found_cb(fname) +class KdocConfig(): + """ + Stores all configuration attributes that kdoc_parser and kdoc_output + needs. + """ + def __init__(self, verbose=False, werror=False, wreturn=False, + wshort_desc=False, wcontents_before_sections=False, + logger=None): + + self.verbose = verbose + self.werror = werror + self.wreturn = wreturn + self.wshort_desc = wshort_desc + self.wcontents_before_sections = wcontents_before_sections + + if logger: + self.log = logger + else: + self.log = logging.getLogger(__file__) + + self.warning = self.log.warning + class KernelFiles(): """ Parse kernel-doc tags on multiple kernel source files. @@ -224,29 +245,25 @@ class KernelFiles(): if kdoc_werror: werror = kdoc_werror + if not logger: + logger = logging.getLogger("kernel-doc") + else: + logger = logger + # Some variables are global to the parser logic as a whole as they are # used to send control configuration to KernelDoc class. As such, # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace + self.config = KdocConfig(verbose, werror, wreturn, wshort_desc, + wcontents_before_sections, logger) - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections + # Override log warning, as we want to count errors + self.config.warning = self.warning if xforms: self.xforms = xforms else: self.xforms = CTransforms() - if not logger: - self.config.log = logging.getLogger("kernel-doc") - else: - self.config.log = logger - - self.config.warning = self.warning - self.config.src_tree = os.environ.get("SRCTREE", None) # Initialize variables that are internal to KernelFiles -- cgit v1.2.3 From 9ab2ca3dd127194a55bd9789c031e800fd19c254 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:05 +0100 Subject: docs: kdoc_files: move output symbols logic to kdoc_output When writing unittests for kdoc_output, it became clear that the logic with handles a series of KdocItem symbols from a single file belons to kdoc_output, and not to kdoc_files. Move the code to it. While here, also ensure that self.config will be placed together with set.out_style. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <4ebc26e37a0b544c50d50b8077760f147fa6a535.1773823995.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_files.py | 18 ++---------------- tools/lib/python/kdoc/kdoc_output.py | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 16 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 1c5cb9e5f0e8..58f4ee08e226 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -269,6 +269,7 @@ class KernelFiles(): # Initialize variables that are internal to KernelFiles self.out_style = out_style + self.out_style.set_config(self.config) self.errors = 0 self.results = {} @@ -311,8 +312,6 @@ class KernelFiles(): returning kernel-doc markups on each interaction. """ - self.out_style.set_config(self.config) - if not filenames: filenames = sorted(self.results.keys()) @@ -336,25 +335,12 @@ class KernelFiles(): function_table, enable_lineno, no_doc_sections) - msg = "" if fname not in self.results: self.config.log.warning("No kernel-doc for file %s", fname) continue symbols = self.results[fname] - self.out_style.set_symbols(symbols) - - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) - - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") - - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m + msg = self.out_style.output_symbols(fname, symbols) if msg: yield fname, msg diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 08539dd92cbb..73d71cbeabb5 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -222,6 +222,27 @@ class OutputFormat: return None + def output_symbols(self, fname, symbols): + """ + Handles a set of KdocItem symbols. + """ + self.set_symbols(symbols) + + msg = "" + for arg in symbols: + m = self.msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + return msg + # Virtual methods to be overridden by inherited classes # At the base class, those do nothing. def set_symbols(self, symbols): -- cgit v1.2.3 From 01c41b99c66ff26a102edbc4f9dff9c74692723e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:06 +0100 Subject: docs: kdoc_item: fix initial value for parameterdesc_start_lines Ensure that parameterdesc_start_lines is a dict at init time, as this is how it will be set later on at the parser. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <1b4ea24dd4cd82e6711e9be80168684427d74c30.1773823995.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index 2b8a93f79716..c0585cdbcbd1 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -22,7 +22,7 @@ class KdocItem: self.sections = {} self.sections_start_lines = {} self.parameterlist = [] - self.parameterdesc_start_lines = [] + self.parameterdesc_start_lines = {} self.parameterdescs = {} self.parametertypes = {} # -- cgit v1.2.3 From 99364ba7f8dca5c1c2d08fe37c5835b86be141f4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:07 +0100 Subject: docs: kdoc_item: add support to generate a KdocItem from a dict When reading the contents on a KdocItem using YAML, the data will be imported into a dict. Add a method to create a new KdocItem from a dict to allow converting such input into a real KdocItem. While here, address an issue that, if the class is initialized with an internal parameter outside the 4 initial arguments, it would end being added inside other_stuff, which breaks initializing it from a dict. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_item.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index c0585cdbcbd1..5f41790efacb 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -25,12 +25,31 @@ class KdocItem: self.parameterdesc_start_lines = {} self.parameterdescs = {} self.parametertypes = {} + + self.warnings = [] + # # Just save everything else into our own dict so that the output # side can grab it directly as before. As we move things into more # structured data, this will, hopefully, fade away. # - self.other_stuff = other_stuff + known_keys = { + 'declaration_start_line', + 'sections', + 'sections_start_lines', + 'parameterlist', + 'parameterdesc_start_lines', + 'parameterdescs', + 'parametertypes', + 'warnings', + } + + self.other_stuff = {} + for k, v in other_stuff.items(): + if k in known_keys: + setattr(self, k, v) # real attribute + else: + self.other_stuff[k] = v def get(self, key, default = None): """ @@ -41,6 +60,20 @@ class KdocItem: def __getitem__(self, key): return self.get(key) + @classmethod + def from_dict(cls, d): + """Create a KdocItem from a plain dict.""" + + cp = d.copy() + name = cp.pop('name', None) + fname = cp.pop('fname', None) + type = cp.pop('type', None) + start_line = cp.pop('start_line', 1) + other_stuff = cp.pop('other_stuff', {}) + + # Everything that’s left goes straight to __init__ + return cls(name, fname, type, start_line, **cp, **other_stuff) + # # Tracking of section and parameter information. # -- cgit v1.2.3 From e394855fcc897f73f23c364a3a596b54cc879e4c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:08 +0100 Subject: docs: kdoc_item: fix a typo on sections_start_lines Currently, there are 15 occurrences of section?_start_lines, with 10 using the plural way. This is an issue, as, while kdoc_output works with KdocItem, the term doesn't match its init value. The variable sections_start_lines stores multiple sections, so placing it in plural is its correct way. So, ensure that, on all parts of kdoc, this will be referred as sections_start_lines. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_item.py | 2 +- tools/lib/python/kdoc/kdoc_output.py | 2 +- tools/lib/python/kdoc/kdoc_parser.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index 5f41790efacb..fe08cac861c2 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -82,7 +82,7 @@ class KdocItem: Set sections and start lines. """ self.sections = sections - self.section_start_lines = start_lines + self.sections_start_lines = start_lines def set_params(self, names, descs, types, starts): """ diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 73d71cbeabb5..1b54117dbe19 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -389,7 +389,7 @@ class RestFormat(OutputFormat): else: self.data += f'{self.lineprefix}**{section}**\n\n' - self.print_lineno(args.section_start_lines.get(section, 0)) + self.print_lineno(args.sections_start_lines.get(section, 0)) self.output_highlight(text) self.data += "\n" self.data += "\n" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f6c4ee3b18c9..35658a7e72d5 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -140,7 +140,7 @@ class KernelEntry: self.parametertypes = {} self.parameterdesc_start_lines = {} - self.section_start_lines = {} + self.sections_start_lines = {} self.sections = {} self.anon_struct_union = False @@ -220,7 +220,7 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line + self.sections_start_lines[name] = self.new_start_line self.new_start_line = 0 # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) @@ -316,7 +316,7 @@ class KernelDoc: for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] - item.set_sections(sections, self.entry.section_start_lines) + item.set_sections(sections, self.entry.sections_start_lines) item.set_params(self.entry.parameterlist, self.entry.parameterdescs, self.entry.parametertypes, self.entry.parameterdesc_start_lines) -- cgit v1.2.3 From b37b3cbbb1f1a99bc8b95d9f00fcf887c27f4770 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:13 +0100 Subject: docs: add a new file to write kernel-doc output to a YAML file Storing kernel-doc output is helpful to allow debugging problems on it and to preparate unit tests. Add a class to store such contents at the same format as defined at kdoc-test-schema.yaml. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <5d084ca1a91f6a620534a1135d1b8183d934319a.1773823995.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_yaml_file.py | 155 ++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 tools/lib/python/kdoc/kdoc_yaml_file.py (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py new file mode 100644 index 000000000000..db131503c3f6 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab . + +import os + +from kdoc.kdoc_output import ManFormat, RestFormat + + +class KDocTestFile(): + """ + Handles the logic needed to store kernel‑doc output inside a YAML file. + Useful for unit tests and regression tests. + """ + + def __init__(self, config, yaml_file, yaml_content): + # + # Bail out early if yaml is not available + # + try: + import yaml + except ImportError: + sys.exit("Warning: yaml package not available. Aborting it.") + + self.config = config + self.test_file = os.path.expanduser(yaml_file) + self.yaml_content = yaml_content + + self.tests = [] + + out_dir = os.path.dirname(self.test_file) + if out_dir and not os.path.isdir(out_dir): + sys.exit(f"Directory {out_dir} doesn't exist.") + + self.out_style = [] + + if "man" in self.yaml_content: + out_style = ManFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + if "rst" in self.yaml_content: + out_style = RestFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + def set_filter(self, export, internal, symbol, nosymbol, + function_table, enable_lineno, no_doc_sections): + """ + Set filters at the output classes. + """ + for out_style in self.out_style: + out_style.set_filter(export, internal, symbol, + nosymbol, function_table, + enable_lineno, no_doc_sections) + + @staticmethod + def get_kdoc_item(arg, start_line=1): + + d = vars(arg) + + declaration_start_line = d.get("declaration_start_line") + if not declaration_start_line: + return d + + d["declaration_start_line"] = start_line + + parameterdesc_start_lines = d.get("parameterdesc_start_lines") + if parameterdesc_start_lines: + for key in parameterdesc_start_lines: + ln = parameterdesc_start_lines[key] + ln += start_line - declaration_start_line + + parameterdesc_start_lines[key] = ln + + sections_start_lines = d.get("sections_start_lines") + if sections_start_lines: + for key in sections_start_lines: + ln = sections_start_lines[key] + ln += start_line - declaration_start_line + + sections_start_lines[key] = ln + + return d + + def output_symbols(self, fname, symbols, source): + """ + Store source, symbols and output strings at self.tests. + """ + + # + # KdocItem needs to be converted into dicts + # + kdoc_item = [] + expected = [] + + if not symbols and not source: + return + + if not source or len(symbols) != len(source): + print(f"Warning: lengths are different. Ignoring {fname}") + + # Folding without line numbers is too hard. + # The right thing to do here to proceed would be to delete + # not-handled source blocks, as len(source) should be bigger + # than len(symbols) + return + + base_name = "test_" + fname.replace(".", "_").replace("/", "_") + expected_dict = {} + start_line=1 + + for i in range(0, len(symbols)): + arg = symbols[i] + + if "KdocItem" in self.yaml_content: + msg = self.get_kdoc_item(arg) + + expected_dict["kdoc_item"] = msg + + for out_style in self.out_style: + if isinstance(out_style, ManFormat): + key = "man" + else: + key = "rst" + + expected_dict[key]= out_style.output_symbols(fname, [arg]) + + name = f"{base_name}_{i:03d}" + + test = { + "name": name, + "description": f"{fname} line {source[i]["ln"]}", + "fname": fname, + "source": source[i]["data"], + "expected": [expected_dict] + } + + self.tests.append(test) + + expected_dict = {} + + def write(self): + """ + Output the content of self.tests to self.test_file. + """ + import yaml + + data = {"tests": self.tests} + + with open(self.test_file, "w", encoding="utf-8") as fp: + yaml.safe_dump(data, fp, sort_keys=False, default_style="|", + default_flow_style=False, allow_unicode=True) -- cgit v1.2.3 From 01d6d7bf9672f1aeabbffaa3fbfb8017223ff878 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 18 Mar 2026 10:11:14 +0100 Subject: docs: kernel-doc: add support to store output on a YAML file Add a command line parameter and library support to optionally store: - KdocItem intermediate format after parsing; - man pages output; - rst output. inside a YAML file. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_files.py | 47 +++++++++++++++++++++++++++++++----- tools/lib/python/kdoc/kdoc_parser.py | 27 ++++++++++++++++++++- 2 files changed, 67 insertions(+), 7 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 58f4ee08e226..5a299ed44d62 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -16,6 +16,7 @@ import re from kdoc.kdoc_parser import KernelDoc from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat +from kdoc.kdoc_yaml_file import KDocTestFile class GlobSourceFiles: @@ -152,6 +153,12 @@ class KernelFiles(): If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + ``yaml_file`` + If defined, stores the output inside a YAML file. + + ``yaml_content`` + Defines what will be inside the YAML file. + Note: There are two type of parsers defined here: @@ -181,7 +188,12 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname, self.xforms) + if self.test_file: + store_src = True + else: + store_src = False + + doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -191,6 +203,10 @@ class KernelFiles(): self.results[fname] = entries + source = doc.get_source() + if source: + self.source[fname] = source + def process_export_file(self, fname): """ Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. @@ -220,7 +236,7 @@ class KernelFiles(): def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None): + yaml_file=None, yaml_content=None, logger=None): """ Initialize startup variables and parse all files. """ @@ -259,6 +275,11 @@ class KernelFiles(): # Override log warning, as we want to count errors self.config.warning = self.warning + if yaml_file: + self.test_file = KDocTestFile(self.config, yaml_file, yaml_content) + else: + self.test_file = None + if xforms: self.xforms = xforms else: @@ -273,6 +294,7 @@ class KernelFiles(): self.errors = 0 self.results = {} + self.source = {} self.files = set() self.export_files = set() @@ -331,16 +353,29 @@ class KernelFiles(): for s in symbol: function_table.add(s) - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - if fname not in self.results: self.config.log.warning("No kernel-doc for file %s", fname) continue symbols = self.results[fname] + if self.test_file: + self.test_file.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + self.test_file.output_symbols(fname, symbols, + self.source.get(fname)) + + continue + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + msg = self.out_style.output_symbols(fname, symbols) if msg: yield fname, msg + + if self.test_file: + self.test_file.write() diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index 35658a7e72d5..a10e64589d76 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -246,12 +246,13 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname, xforms): + def __init__(self, config, fname, xforms, store_src=False): """Initialize internal variables""" self.fname = fname self.config = config self.xforms = xforms + self.store_src = store_src tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") @@ -264,6 +265,9 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] + # When store_src is true, the kernel-doc source content is stored here + self.source = None + # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -1592,6 +1596,15 @@ class KernelDoc: state.DOCBLOCK: process_docblock, } + def get_source(self): + """ + Return the file content of the lines handled by kernel-doc at the + latest parse_kdoc() run. + + Returns none if KernelDoc() was not initialized with store_src, + """ + return self.source + def parse_kdoc(self): """ Open and process each line of a C source file. @@ -1605,6 +1618,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() + self.source = [] + self.state = state.NORMAL try: with open(self.fname, "r", encoding="utf8", @@ -1631,6 +1646,8 @@ class KernelDoc: ln, state.name[self.state], line) + prev_state = self.state + # This is an optimization over the original script. # There, when export_file was used for the same file, # it was read twice. Here, we use the already-existing @@ -1641,6 +1658,14 @@ class KernelDoc: # Hand this line to the appropriate state handler self.state_actions[self.state](self, ln, line) + if self.store_src and prev_state != self.state or self.state != state.NORMAL: + if self.state == state.NAME: + # A "/**" was detected. Add a new source element + self.source.append({"ln": ln, "data": line + "\n"}) + else: + # Append to the existing one + self.source[-1]["data"] += line + "\n" + self.emit_unused_warnings() except OSError: -- cgit v1.2.3 From 6e0d7b63676b85490bbaf01c9a8ebcd692bed981 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:47 +0100 Subject: docs: kdoc_yaml_file: add a representer to make strings look nicer The strings representation is not ok, currently. Add a helper function to improve it, and drop blank lines at beginning and at the end of the dumps Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <422041a8b49b2609de5749092fe074b7948c32a6.1774256269.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_yaml_file.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py index db131503c3f6..18737abb1176 100644 --- a/tools/lib/python/kdoc/kdoc_yaml_file.py +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -126,7 +126,7 @@ class KDocTestFile(): else: key = "rst" - expected_dict[key]= out_style.output_symbols(fname, [arg]) + expected_dict[key]= out_style.output_symbols(fname, [arg]).strip() name = f"{base_name}_{i:03d}" @@ -148,8 +148,20 @@ class KDocTestFile(): """ import yaml + # Helper function to better handle multilines + def str_presenter(dumper, data): + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + # Register the representer + yaml.add_representer(str, str_presenter) + data = {"tests": self.tests} with open(self.test_file, "w", encoding="utf-8") as fp: - yaml.safe_dump(data, fp, sort_keys=False, default_style="|", - default_flow_style=False, allow_unicode=True) + yaml.dump(data, fp, + sort_keys=False, width=120, indent=2, + default_flow_style=False, allow_unicode=True, + explicit_start=False, explicit_end=False) -- cgit v1.2.3 From 8326e4a21838593fe67b5d79ba6d0dc8e962ebb9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:49 +0100 Subject: docs: kdoc_output: fix handling of simple tables Fix check for simple table delimiters. ReST simple tables use "=" instead of "-". I ended testing it with a table modified from a complex one, using "--- --- ---", instead of searching for a real Kernel example. Only noticed when adding an unit test and seek for an actual example from kernel-doc markups. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_output.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 1b54117dbe19..2bfcd356654b 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -846,14 +846,14 @@ class ManFormat(OutputFormat): colspec_row = None pos = [] - for m in KernRe(r'\-+').finditer(lines[i]): + for m in KernRe(r'\=+').finditer(lines[i]): pos.append((m.start(), m.end() - 1)) i += 1 while i < len(lines): line = lines[i] - if KernRe(r"^\s*[\-]+[ \t\-]+$").match(line): + if KernRe(r"^\s*[\=]+[ \t\=]+$").match(line): i += 1 break @@ -969,7 +969,7 @@ class ManFormat(OutputFormat): self.data += text continue - if KernRe(r"^\-+[ \t]\-[ \t\-]+$").match(line): + if KernRe(r"^\=+[ \t]\=[ \t\=]+$").match(line): i, text = self.simple_table(lines, i) self.data += text continue -- cgit v1.2.3 From 99ec67a9984fdf38c7ed78695aeb1b99cfee5b50 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:50 +0100 Subject: docs: kdoc: better handle source when producing YAML output The current logic was storing symbols source code on a list, not linked to the actual KdocItem. While this works fine when kernel-doc markups are OK, on places where there is a "/**" without a valid kernel-doc markup, it ends that the 1:1 match between source code and KdocItem doesn't happen, causing problems to generate the YAML output. Fix it by storing the source code directly into the KdocItem structure. This shouldn't affect performance or memory footprint, except when --yaml option is used. While here, add a __repr__() function for KdocItem, as it helps debugging it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <77902dafabb5c3250486aa2dc1568d5fafa95c5b.1774256269.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_files.py | 8 +-- tools/lib/python/kdoc/kdoc_item.py | 6 +- tools/lib/python/kdoc/kdoc_parser.py | 100 ++++++++++++++++---------------- tools/lib/python/kdoc/kdoc_yaml_file.py | 28 ++++----- 4 files changed, 70 insertions(+), 72 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 5a299ed44d62..2428cfc4e843 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -203,10 +203,6 @@ class KernelFiles(): self.results[fname] = entries - source = doc.get_source() - if source: - self.source[fname] = source - def process_export_file(self, fname): """ Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. @@ -294,7 +290,6 @@ class KernelFiles(): self.errors = 0 self.results = {} - self.source = {} self.files = set() self.export_files = set() @@ -364,8 +359,7 @@ class KernelFiles(): function_table, enable_lineno, no_doc_sections) - self.test_file.output_symbols(fname, symbols, - self.source.get(fname)) + self.test_file.output_symbols(fname, symbols) continue diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index fe08cac861c2..a7aa6e1e4c1c 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -14,7 +14,8 @@ class KdocItem: then pass into the output modules. """ - def __init__(self, name, fname, type, start_line, **other_stuff): + def __init__(self, name, fname, type, start_line, + **other_stuff): self.name = name self.fname = fname self.type = type @@ -60,6 +61,9 @@ class KdocItem: def __getitem__(self, key): return self.get(key) + def __repr__(self): + return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})" + @classmethod def from_dict(cls, d): """Create a KdocItem from a plain dict.""" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index a10e64589d76..74af7ae47aa4 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -265,9 +265,6 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - # When store_src is true, the kernel-doc source content is stored here - self.source = None - # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -720,13 +717,14 @@ class KernelDoc: return declaration - def dump_struct(self, ln, proto): + def dump_struct(self, ln, proto, source): """ Store an entry for a ``struct`` or ``union`` """ # # Do the basic parse to get the pieces of the declaration. # + source = source proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: @@ -756,10 +754,11 @@ class KernelDoc: declaration_name) self.check_sections(ln, declaration_name, decl_type) self.output_declaration(decl_type, declaration_name, + source=source, definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) - def dump_enum(self, ln, proto): + def dump_enum(self, ln, proto, source): """ Store an ``enum`` inside self.entries array. """ @@ -767,6 +766,7 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + source = source proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # @@ -831,9 +831,10 @@ class KernelDoc: f"Excess enum value '@{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, + source=source, purpose=self.entry.declaration_purpose) - def dump_var(self, ln, proto): + def dump_var(self, ln, proto, source): """ Store variables that are part of kAPI. """ @@ -846,6 +847,7 @@ class KernelDoc: # # Store the full prototype before modifying it # + source = source full_proto = proto declaration_name = None @@ -895,32 +897,34 @@ class KernelDoc: default_val = default_val.lstrip("=").strip() self.output_declaration("var", declaration_name, + source=source, full_proto=full_proto, default_val=default_val, purpose=self.entry.declaration_purpose) - def dump_declaration(self, ln, prototype): + def dump_declaration(self, ln, prototype, source): """ Store a data declaration inside self.entries array. """ if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) + self.dump_enum(ln, prototype, source) elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) + self.dump_typedef(ln, prototype, source) elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) + self.dump_struct(ln, prototype, source) elif self.entry.decl_type == "var": - self.dump_var(ln, prototype) + self.dump_var(ln, prototype, source) else: # This would be a bug self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - def dump_function(self, ln, prototype): + def dump_function(self, ln, prototype, source): """ Store a function or function macro inside self.entries array. """ + source = source found = func_macro = False return_type = '' decl_type = 'function' @@ -1013,13 +1017,14 @@ class KernelDoc: # Store the result. # self.output_declaration(decl_type, declaration_name, + source=source, typedef=('typedef' in return_type), functiontype=return_type, purpose=self.entry.declaration_purpose, func_macro=func_macro) - def dump_typedef(self, ln, proto): + def dump_typedef(self, ln, proto, source): """ Store a ``typedef`` inside self.entries array. """ @@ -1030,6 +1035,8 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' + source = source + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) @@ -1050,6 +1057,7 @@ class KernelDoc: self.create_parameter_list(ln, 'function', args, ',', declaration_name) self.output_declaration('function', declaration_name, + source=source, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) @@ -1067,6 +1075,7 @@ class KernelDoc: return self.output_declaration('typedef', declaration_name, + source=source, purpose=self.entry.declaration_purpose) return @@ -1104,7 +1113,7 @@ class KernelDoc: function_set.add(symbol) return True - def process_normal(self, ln, line): + def process_normal(self, ln, line, source): """ STATE_NORMAL: looking for the ``/**`` to begin everything. """ @@ -1118,7 +1127,7 @@ class KernelDoc: # next line is always the function name self.state = state.NAME - def process_name(self, ln, line): + def process_name(self, ln, line, source): """ STATE_NAME: Looking for the "name - description" line """ @@ -1251,7 +1260,7 @@ class KernelDoc: return False - def process_decl(self, ln, line): + def process_decl(self, ln, line, source): """ STATE_DECLARATION: We've seen the beginning of a declaration. """ @@ -1280,7 +1289,7 @@ class KernelDoc: self.emit_msg(ln, f"bad line: {line}") - def process_special(self, ln, line): + def process_special(self, ln, line, source): """ STATE_SPECIAL_SECTION: a section ending with a blank line. """ @@ -1331,7 +1340,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_body(self, ln, line): + def process_body(self, ln, line, source): """ STATE_BODY: the bulk of a kerneldoc comment. """ @@ -1345,7 +1354,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_inline_name(self, ln, line): + def process_inline_name(self, ln, line, source): """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" if doc_inline_sect.search(line): @@ -1363,10 +1372,10 @@ class KernelDoc: # Don't let it add partial comments at the code, as breaks the # logic meant to remove comments from prototypes. # - self.process_proto_type(ln, "/**\n" + line) + self.process_proto_type(ln, "/**\n" + line, source) # else ... ?? - def process_inline_text(self, ln, line): + def process_inline_text(self, ln, line, source): """STATE_INLINE_TEXT: docbook comments within a prototype.""" if doc_inline_end.search(line): @@ -1452,7 +1461,7 @@ class KernelDoc: return proto - def process_proto_function(self, ln, line): + def process_proto_function(self, ln, line, source): """Ancillary routine to process a function prototype.""" # strip C99-style comments to end of line @@ -1494,10 +1503,10 @@ class KernelDoc: # # ... and we're done # - self.dump_function(ln, self.entry.prototype) + self.dump_function(ln, self.entry.prototype, source) self.reset_state(ln) - def process_proto_type(self, ln, line): + def process_proto_type(self, ln, line, source): """ Ancillary routine to process a type. """ @@ -1527,7 +1536,7 @@ class KernelDoc: elif chunk == '}': self.entry.brcount -= 1 elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) + self.dump_declaration(ln, self.entry.prototype, source) self.reset_state(ln) return # @@ -1536,7 +1545,7 @@ class KernelDoc: # self.entry.prototype += ' ' - def process_proto(self, ln, line): + def process_proto(self, ln, line, source): """STATE_PROTO: reading a function/whatever prototype.""" if doc_inline_oneline.search(line): @@ -1548,17 +1557,18 @@ class KernelDoc: self.state = state.INLINE_NAME elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) + self.process_proto_function(ln, line, source) else: - self.process_proto_type(ln, line) + self.process_proto_type(ln, line, source) - def process_docblock(self, ln, line): + def process_docblock(self, ln, line, source): """STATE_DOCBLOCK: within a ``DOC:`` block.""" if doc_end.search(line): self.dump_section() - self.output_declaration("doc", self.entry.identifier) + self.output_declaration("doc", self.entry.identifier, + source=source) self.reset_state(ln) elif doc_content.search(line): @@ -1596,15 +1606,6 @@ class KernelDoc: state.DOCBLOCK: process_docblock, } - def get_source(self): - """ - Return the file content of the lines handled by kernel-doc at the - latest parse_kdoc() run. - - Returns none if KernelDoc() was not initialized with store_src, - """ - return self.source - def parse_kdoc(self): """ Open and process each line of a C source file. @@ -1618,8 +1619,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() - self.source = [] self.state = state.NORMAL + source = "" try: with open(self.fname, "r", encoding="utf8", @@ -1646,7 +1647,11 @@ class KernelDoc: ln, state.name[self.state], line) - prev_state = self.state + if self.store_src: + if source and self.state == state.NORMAL: + source = "" + elif self.state != state.NORMAL: + source += line + "\n" # This is an optimization over the original script. # There, when export_file was used for the same file, @@ -1655,16 +1660,11 @@ class KernelDoc: # if (self.state != state.NORMAL) or \ not self.process_export(export_table, line): + prev_state = self.state # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) - - if self.store_src and prev_state != self.state or self.state != state.NORMAL: - if self.state == state.NAME: - # A "/**" was detected. Add a new source element - self.source.append({"ln": ln, "data": line + "\n"}) - else: - # Append to the existing one - self.source[-1]["data"] += line + "\n" + self.state_actions[self.state](self, ln, line, source) + if prev_state == state.NORMAL and self.state != state.NORMAL: + source += line + "\n" self.emit_unused_warnings() diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py index 18737abb1176..1e2ae7c59d70 100644 --- a/tools/lib/python/kdoc/kdoc_yaml_file.py +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -85,7 +85,7 @@ class KDocTestFile(): return d - def output_symbols(self, fname, symbols, source): + def output_symbols(self, fname, symbols): """ Store source, symbols and output strings at self.tests. """ @@ -96,16 +96,10 @@ class KDocTestFile(): kdoc_item = [] expected = [] - if not symbols and not source: - return - - if not source or len(symbols) != len(source): - print(f"Warning: lengths are different. Ignoring {fname}") - - # Folding without line numbers is too hard. - # The right thing to do here to proceed would be to delete - # not-handled source blocks, as len(source) should be bigger - # than len(symbols) + # + # Source code didn't produce any symbol + # + if not symbols: return base_name = "test_" + fname.replace(".", "_").replace("/", "_") @@ -115,9 +109,15 @@ class KDocTestFile(): for i in range(0, len(symbols)): arg = symbols[i] - if "KdocItem" in self.yaml_content: + source = arg.get("source", "") + + if arg and "KdocItem" in self.yaml_content: msg = self.get_kdoc_item(arg) + other_stuff = msg.get("other_stuff", {}) + if "source" in other_stuff: + del other_stuff["source"] + expected_dict["kdoc_item"] = msg for out_style in self.out_style: @@ -132,9 +132,9 @@ class KDocTestFile(): test = { "name": name, - "description": f"{fname} line {source[i]["ln"]}", + "description": f"{fname} line {arg.declaration_start_line}", "fname": fname, - "source": source[i]["data"], + "source": source, "expected": [expected_dict] } -- cgit v1.2.3 From e786fab2cfcc9ab65adf35d2eab4ca94abe1955f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:51 +0100 Subject: docs: kdoc_yaml_file: use a better name for the tests Instead of always using a name with a number on it, use the name of the object directly whenever possible. When the name is already used, append a number prefix at the end. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- tools/lib/python/kdoc/kdoc_yaml_file.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py index 1e2ae7c59d70..0be020d50df0 100644 --- a/tools/lib/python/kdoc/kdoc_yaml_file.py +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -25,6 +25,7 @@ class KDocTestFile(): self.config = config self.test_file = os.path.expanduser(yaml_file) self.yaml_content = yaml_content + self.test_names = set() self.tests = [] @@ -102,13 +103,10 @@ class KDocTestFile(): if not symbols: return - base_name = "test_" + fname.replace(".", "_").replace("/", "_") expected_dict = {} start_line=1 - for i in range(0, len(symbols)): - arg = symbols[i] - + for arg in symbols: source = arg.get("source", "") if arg and "KdocItem" in self.yaml_content: @@ -120,6 +118,21 @@ class KDocTestFile(): expected_dict["kdoc_item"] = msg + base_name = arg.name + if not base_name: + base_name = fname + base_name = base_name.lower().replace(".", "_").replace("/", "_") + + + # Don't add duplicated names + i = 0 + name = base_name + while name in self.test_names: + i += 1 + name = f"{base_name}_{i:03d}" + + self.test_names.add(name) + for out_style in self.out_style: if isinstance(out_style, ManFormat): key = "man" @@ -128,8 +141,6 @@ class KDocTestFile(): expected_dict[key]= out_style.output_symbols(fname, [arg]).strip() - name = f"{base_name}_{i:03d}" - test = { "name": name, "description": f"{fname} line {arg.declaration_start_line}", -- cgit v1.2.3 From 9c3911812b4a719623ea7502b419929eb01b2fc2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:52 +0100 Subject: docs: kdoc_output: raise an error if full_proto not available for var This is mandatory, but if it is missing, we need to know what symbol had problems. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <0c3d9dd25889784b999efdb354ade48264c0e03c.1774256269.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_output.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 2bfcd356654b..de107ab4a281 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -513,7 +513,9 @@ class RestFormat(OutputFormat): def out_var(self, fname, name, args): oldprefix = self.lineprefix ln = args.declaration_start_line - full_proto = args.other_stuff["full_proto"] + full_proto = args.other_stuff.get("full_proto") + if not full_proto: + raise KeyError(f"Can't find full proto for {name} variable") self.lineprefix = " " -- cgit v1.2.3 From 2ca0b54dca438edb0f6b0eec7913d3cab60ddebf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 23 Mar 2026 10:10:53 +0100 Subject: docs: c_lex.py: store logger on its data By having the logger stored there, any code using CTokenizer can log messages there. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <467979dc18149e4b2a7113c178e0cb07919632f2.1774256269.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/c_lex.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index e01b154f458e..cb95f5172448 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -177,7 +177,7 @@ class CTokenizer(): # This class is inspired and follows the basic concepts of: # https://docs.python.org/3/library/re.html#writing-a-tokenizer - def __init__(self, source=None, log=None): + def __init__(self, source=None): """ Create a regular expression to handle RE_SCANNER_LIST. @@ -188,6 +188,12 @@ class CTokenizer(): when matching a code via RE_SCANNER. """ + # + # Store logger to allow parser classes to re-use it + # + global log + self.log = log + self.tokens = [] if not source: -- cgit v1.2.3 From d642acfd597e3ec37138f9a8f5a634845e3612fd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Mar 2026 06:57:48 +0100 Subject: doc tools: better handle KBUILD_VERBOSE As reported by Jacob, there are troubles when KBUILD_VERBOSE is set at the environment. Fix it on both kernel-doc and sphinx-build-wrapper. Reported-by: Jacob Keller Closes: https://lore.kernel.org/linux-doc/9367d899-53af-4d9c-9320-22fc4dbadca5@intel.com/ Signed-off-by: Mauro Carvalho Chehab Tested-by: Jacob Keller Signed-off-by: Jonathan Corbet Message-ID: <7a99788db75630fb14828d612c0fd77c45ec1891.1774591065.git.mchehab+huawei@kernel.org> --- tools/lib/python/kdoc/kdoc_files.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 2428cfc4e843..ed82b6e6ab25 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -238,7 +238,12 @@ class KernelFiles(): """ if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + try: + verbose = bool(int(os.environ.get("KBUILD_VERBOSE", 0))) + except ValueError: + # Handles an eventual case where verbosity is not a number + # like KBUILD_VERBOSE="" + verbose = False if out_style is None: out_style = OutputFormat() -- cgit v1.2.3 From 07f6cb18c5dd627023e0810cfd51203392f55990 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 26 Mar 2026 20:09:42 +0100 Subject: tools: unittest_helper: add a quiet mode On quiet mode, only report errors. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: <27556792ff70e6267ecd19c258149d380db8d423.1774551940.git.mchehab+huawei@kernel.org> --- tools/lib/python/unittest_helper.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'tools/lib/python') diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py index 55d444cd73d4..f3cba5120401 100755 --- a/tools/lib/python/unittest_helper.py +++ b/tools/lib/python/unittest_helper.py @@ -141,7 +141,7 @@ class Summary(unittest.TestResult): super().addSkip(test, reason) self._record_test(test, f"SKIP ({reason})") - def printResults(self): + def printResults(self, verbose): """ Print results using colors if tty. """ @@ -174,10 +174,15 @@ class Summary(unittest.TestResult): # Print results for module_name, classes in self.test_results.items(): - print(f"{module_name}:") + if verbose: + print(f"{module_name}:") for class_name, tests in classes.items(): - print(f" {class_name}:") + if verbose: + print(f" {class_name}:") for test_name, status in tests: + if not verbose and status in [ "OK", "EXPECTED_FAIL" ]: + continue + # Get base status without reason for SKIP if status.startswith("SKIP"): status_code = status.split()[0] @@ -187,7 +192,8 @@ class Summary(unittest.TestResult): print( f" {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}" ) - print() + if verbose: + print() # Print summary print(f"\nRan {self.testsRun} tests", end="") @@ -230,6 +236,7 @@ class TestUnits: """Returns a parser for command line arguments.""" parser = argparse.ArgumentParser(description="Test runner with regex filtering") parser.add_argument("-v", "--verbose", action="count", default=1) + parser.add_argument("-q", "--quiet", action="store_true") parser.add_argument("-f", "--failfast", action="store_true") parser.add_argument("-k", "--keyword", help="Regex pattern to filter test methods") @@ -279,7 +286,10 @@ class TestUnits: if not caller_file and not suite: raise TypeError("Either caller_file or suite is needed at TestUnits") - verbose = args.verbose + if args.quiet: + verbose = 0 + else: + verbose = args.verbose if not env: env = os.environ.copy() @@ -334,7 +344,7 @@ class TestUnits: failfast=args.failfast) result = runner.run(suite) if resultclass: - result.printResults() + result.printResults(verbose) sys.exit(not result.wasSuccessful()) -- cgit v1.2.3