From 2b144a30a407d29b7e6d24549f5316175115e788 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:44 +0100
Subject: docs: kdoc_re: add support for groups()

Add an equivalent to re groups() method.
This is useful on debug messages.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <20d1a9c77200e28cc2ff1d6122635c43f8ba6a71.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 0bf9e01cdc57..774dd747ecb0 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -106,6 +106,13 @@ class KernRe:
 
         return self.last_match.group(num)
 
+    def groups(self):
+        """
+        Returns the group results of the last match
+        """
+
+        return self.last_match.groups()
+
 
 class NestedMatch:
     """
-- 
cgit v1.2.3


From 8eb49357ffa229c9b65a002f655c1280dc09769a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:45 +0100
Subject: docs: kdoc_re: don't go past the end of a line

The logic which checks if the line ends with ";" is currently
broken: it may try to read past the buffer.

Fix it by checking before trying to access line[pos].

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <bce51ba0260a053a0ec55a7375d6ed7a7c08026c.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 774dd747ecb0..6c44fcce0415 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -269,7 +269,7 @@ class NestedMatch:
             out += new_sub
 
             # Drop end ';' if any
-            if line[pos] == ';':
+            if pos < len(line) and line[pos] == ';':
                 pos += 1
 
             cur_pos = pos
-- 
cgit v1.2.3


From 77e6e17e9fc4cb4e59ad97de5453bb6f963a5fd4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:46 +0100
Subject: docs: kdoc_parser: move var transformers to the beginning

Just like functions and structs had their transform variables
placed at the beginning, move variable transforms to there
as well.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <491b290252a308f381f88353a3bbe9e2bd1f6a62.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index ca00695b47b3..68a5aea9175d 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -192,6 +192,18 @@ function_xforms  = [
     (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
 ]
 
+#
+# Transforms for variable prototypes
+#
+var_xforms = [
+    (KernRe(r"__read_mostly"), ""),
+    (KernRe(r"__ro_after_init"), ""),
+    (KernRe(r"(?://.*)$"), ""),
+    (KernRe(r"(?:/\*.*\*/)"), ""),
+    (KernRe(r";$"), ""),
+    (KernRe(r"=.*"), ""),
+]
+
 #
 # Ancillary functions
 #
@@ -972,15 +984,6 @@ class KernelDoc:
         ]
         OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
 
-        sub_prefixes = [
-            (KernRe(r"__read_mostly"), ""),
-            (KernRe(r"__ro_after_init"), ""),
-            (KernRe(r"(?://.*)$"), ""),
-            (KernRe(r"(?:/\*.*\*/)"), ""),
-            (KernRe(r";$"), ""),
-            (KernRe(r"=.*"), ""),
-        ]
-
         #
         # Store the full prototype before modifying it
         #
@@ -1004,7 +1007,7 @@ class KernelDoc:
         # Drop comments and macros to have a pure C prototype
         #
         if not declaration_name:
-            for r, sub in sub_prefixes:
+            for r, sub in var_xforms:
                 proto = r.sub(sub, proto)
 
         proto = proto.rstrip()
-- 
cgit v1.2.3


From cca1bbdd72f72a3cf86d90fd6f326fd709ae931f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:47 +0100
Subject: docs: kdoc_parser: don't mangle with function defines

Mangling with #defines is not nice, as we may end removing
the macro names, preventing several macros from being properly
documented.

Also, on defines, we have something like:

	#define foo(a1, a2, a3, ...)			 \
		/* some real implementation */

The prototype part (first line on this example) won't contain
any macros, so no need to apply any regexes on it.

With that, move the apply_transforms() logic to ensure that
it will be called only on functions.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8f9854c8ca1c794b6a3fe418f7adbc32aa68b432.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 68a5aea9175d..9643ffb7584a 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -163,7 +163,7 @@ struct_nested_prefixes = [
 #
 # Transforms for function prototypes
 #
-function_xforms  = [
+function_xforms = [
     (KernRe(r"^static +"), ""),
     (KernRe(r"^extern +"), ""),
     (KernRe(r"^asmlinkage +"), ""),
@@ -1066,10 +1066,7 @@ class KernelDoc:
         found = func_macro = False
         return_type = ''
         decl_type = 'function'
-        #
-        # Apply the initial transformations.
-        #
-        prototype = apply_transforms(function_xforms, prototype)
+
         #
         # If we have a macro, remove the "#define" at the front.
         #
@@ -1088,6 +1085,11 @@ class KernelDoc:
                 declaration_name = r.group(1)
                 func_macro = True
                 found = True
+        else:
+            #
+            # Apply the initial transformations.
+            #
+            prototype = apply_transforms(function_xforms, prototype)
 
         # Yes, this truly is vile.  We are looking for:
         # 1. Return type (may be nothing if we're looking at a macro)
-- 
cgit v1.2.3


From 4fd349f03dc51bc2f9cd2ea9f6309b0bc2b848ca Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:48 +0100
Subject: docs: kdoc_parser: fix variable regexes to work with size_t

The regular expressions meant to pick variable types are too
naive: they forgot that the type word may contain underlines.

It also means that we need to change the regex which detects
var attributes to handle "const".

Co-developed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8230715239929cf9d475ab81ca1df7de65d82d06.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 9643ffb7584a..9c9443281c40 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -981,8 +981,9 @@ class KernelDoc:
         """
         VAR_ATTRIBS = [
             "extern",
+            "const",
         ]
-        OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
+        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
 
         #
         # Store the full prototype before modifying it
@@ -1018,14 +1019,14 @@ class KernelDoc:
 
         default_val = None
 
-        r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+        r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             if not declaration_name:
                 declaration_name = r.group(1)
 
             default_val = r.group(2)
         else:
-            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             default_val = r.group(1)
 
-- 
cgit v1.2.3


From 9bbf22b87d866fa1e6a1f9f6376d2ef458b6dcc7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:49 +0100
Subject: docs: kdoc_parser: fix the default_value logic for variables

The indentation is wrong for the second regex, which causes
problems on variables with defaults.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <681f18338abd6ae33cb9c15d72bb31a1cba75a9a.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 9c9443281c40..4bf55244870f 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1027,9 +1027,9 @@ class KernelDoc:
             default_val = r.group(2)
         else:
             r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
-        if r.match(proto):
-            default_val = r.group(1)
 
+            if r.match(proto):
+                default_val = r.group(1)
         if not declaration_name:
            self.emit_msg(ln,f"{proto}: can't parse variable")
            return
-- 
cgit v1.2.3


From b7dc635459ad5b00f2d482406dbdca3291622ce2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:50 +0100
Subject: docs: kdoc_parser: don't exclude defaults from prototype

If we do that, the defaults won't be parsed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <cedf2a819846d2f082388e9ba3d95047c35df6fd.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 4bf55244870f..39ff27d421eb 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -201,7 +201,6 @@ var_xforms = [
     (KernRe(r"(?://.*)$"), ""),
     (KernRe(r"(?:/\*.*\*/)"), ""),
     (KernRe(r";$"), ""),
-    (KernRe(r"=.*"), ""),
 ]
 
 #
-- 
cgit v1.2.3


From 6d9c2e9575b8630e17571a77eef8ade84a2a6344 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:51 +0100
Subject: docs: kdoc_parser: fix parser to support multi-word types

The regular expression currently expects a single word for the
type, but it may be something like  "struct foo".

Add support for it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <544c73a9e670b6fef1828bf4f2ba0de7d29d8675.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 39ff27d421eb..22a820d33dc8 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1018,14 +1018,14 @@ class KernelDoc:
 
         default_val = None
 
-        r= KernRe(OPTIONAL_VAR_ATTR + r"[\w_]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             if not declaration_name:
                 declaration_name = r.group(1)
 
             default_val = r.group(2)
         else:
-            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
 
             if r.match(proto):
                 default_val = r.group(1)
-- 
cgit v1.2.3


From 9bff5121fe22fdd0bb5bd6f744e136ec20bf7b95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:52 +0100
Subject: docs: kdoc_parser: add support for LIST_HEAD

Convert LIST_HEAD into struct list_head when handling its
prototype.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8bdfa6ba6002b0a73a83660f0ce7b40e30124552.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 22a820d33dc8..1df869061bf3 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -198,6 +198,7 @@ function_xforms = [
 var_xforms = [
     (KernRe(r"__read_mostly"), ""),
     (KernRe(r"__ro_after_init"), ""),
+    (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
     (KernRe(r"(?://.*)$"), ""),
     (KernRe(r"(?:/\*.*\*/)"), ""),
     (KernRe(r";$"), ""),
-- 
cgit v1.2.3


From 97d4e70bc2c6f75911a9a5e1a75f2de13fde9b6b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 2 Mar 2026 17:40:53 +0100
Subject: docs: kdoc_parser: handle struct member macro
 VIRTIO_DECLARE_FEATURES(name)

Parse the macro VIRTIO_DECLARE_FEATURES(name) and expand it to its
definition. These prevents one build warning:

WARNING: include/linux/virtio.h:188 struct member 'VIRTIO_DECLARE_FEATURES(features' not described in 'virtio_device'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <6f62e1f1210e74906fa50f4e937f66f54813661b.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 1df869061bf3..917e4528bfbf 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -150,6 +150,7 @@ struct_xforms = [
             struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
     (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
     (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+    (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
 ]
 #
 # Regexes here are guaranteed to have the end delimiter matching
-- 
cgit v1.2.3


From 95a9429cc6d31371575793ab7beb94bf3e7a2f92 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:54 +0100
Subject: docs: kdoc_re: better show KernRe() at documentation

the __repr__() function is used by autodoc to document macro
initialization.

Add a better representation for them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <80d27732368c14125c1b76048a70d8b4aee527ef.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 6c44fcce0415..664c04c8cc9f 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -52,7 +52,28 @@ class KernRe:
         return self.regex.pattern
 
     def __repr__(self):
-        return f're.compile("{self.regex.pattern}")'
+        """
+        Returns a displayable version of the class init.
+        """
+
+        flag_map = {
+            re.IGNORECASE: "re.I",
+            re.MULTILINE: "re.M",
+            re.DOTALL: "re.S",
+            re.VERBOSE: "re.X",
+        }
+
+        flags = []
+        for flag, name in flag_map.items():
+            if self.regex.flags & flag:
+                flags.append(name)
+
+        flags_name = " | ".join(flags)
+
+        if flags_name:
+            return f'KernRe("{self.regex.pattern}", {flags_name})'
+        else:
+            return f'KernRe("{self.regex.pattern}")'
 
     def __add__(self, other):
         """
-- 
cgit v1.2.3


From d842057c4a205084fb3036122c7426963f04e826 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:55 +0100
Subject: docs: kdoc_parser: move transform lists to a separate file

Over the time, most of the changes at kernel-doc are related
to maintaining a list of transforms to convert macros into pure
C code.

Place such transforms on a separate module, to cleanup the
parser module.

There is an advantage on that: QEMU also uses our own kernel-doc,
but the xforms list there is different. By placing it on a
separate module, we can minimize the differences and make it
easier to keep QEMU in sync with Kernel upstream.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <ccd74b7589e1fff340a74bf8ed16a974532cb54f.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py   |   3 +-
 tools/lib/python/kdoc/kdoc_parser.py  | 145 ++------------------------------
 tools/lib/python/kdoc/xforms_lists.py | 153 ++++++++++++++++++++++++++++++++++
 3 files changed, 160 insertions(+), 141 deletions(-)
 create mode 100644 tools/lib/python/kdoc/xforms_lists.py

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 022487ea2cc6..33618c6abec2 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -15,6 +15,7 @@ import os
 import re
 
 from kdoc.kdoc_parser import KernelDoc
+from kdoc.xforms_lists import CTransforms
 from kdoc.kdoc_output import OutputFormat
 
 
@@ -117,7 +118,7 @@ class KernelFiles():
         if fname in self.files:
             return
 
-        doc = KernelDoc(self.config, fname)
+        doc = KernelDoc(self.config, fname, CTransforms())
         export_table, entries = doc.parse_kdoc()
 
         self.export_table[fname] = export_table
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 917e4528bfbf..d7daf658e9d2 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -69,89 +69,6 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
                         r'(?:[-:].*)?$',		# description (not captured)
                         cache = False)
 
-#
-# Here begins a long set of transformations to turn structure member prefixes
-# and macro invocations into something we can parse and generate kdoc for.
-#
-struct_args_pattern = r'([^,)]+)'
-
-struct_xforms = [
-    # Strip attributes
-    (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
-    (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__packed\s*', re.S), ' '),
-    (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
-    (KernRe(r'\s*__private', re.S), ' '),
-    (KernRe(r'\s*__rcu', re.S), ' '),
-    (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
-    (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
-    (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-    #
-    # Unwrap struct_group macros based on this definition:
-    # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
-    # which has variants like: struct_group(NAME, MEMBERS...)
-    # Only MEMBERS arguments require documentation.
-    #
-    # Parsing them happens on two steps:
-    #
-    # 1. drop struct group arguments that aren't at MEMBERS,
-    #    storing them as STRUCT_GROUP(MEMBERS)
-    #
-    # 2. remove STRUCT_GROUP() ancillary macro.
-    #
-    # The original logic used to remove STRUCT_GROUP() using an
-    # advanced regex:
-    #
-    #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
-    #
-    # with two patterns that are incompatible with
-    # Python re module, as it has:
-    #
-    #   - a recursive pattern: (?1)
-    #   - an atomic grouping: (?>...)
-    #
-    # I tried a simpler version: but it didn't work either:
-    #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
-    #
-    # As it doesn't properly match the end parenthesis on some cases.
-    #
-    # So, a better solution was crafted: there's now a NestedMatch
-    # class that ensures that delimiters after a search are properly
-    # matched. So, the implementation to drop STRUCT_GROUP() will be
-    # handled in separate.
-    #
-    (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
-    (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
-    (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
-    (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
-    #
-    # Replace macros
-    #
-    # TODO: use NestedMatch for FOO($1, $2, ...) matches
-    #
-    # it is better to also move those to the NestedMatch logic,
-    # to ensure that parentheses will be properly matched.
-    #
-    (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
-     r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
-    (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
-     r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
-    (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-            re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
-    (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-            re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
-    (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
-            r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-    (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
-            struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-    (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
-            struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
-    (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
-    (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
-    (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
-]
 #
 # Regexes here are guaranteed to have the end delimiter matching
 # the start delimiter. Yet, right now, only one replace group
@@ -161,62 +78,10 @@ struct_nested_prefixes = [
     (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
 ]
 
-#
-# Transforms for function prototypes
-#
-function_xforms = [
-    (KernRe(r"^static +"), ""),
-    (KernRe(r"^extern +"), ""),
-    (KernRe(r"^asmlinkage +"), ""),
-    (KernRe(r"^inline +"), ""),
-    (KernRe(r"^__inline__ +"), ""),
-    (KernRe(r"^__inline +"), ""),
-    (KernRe(r"^__always_inline +"), ""),
-    (KernRe(r"^noinline +"), ""),
-    (KernRe(r"^__FORTIFY_INLINE +"), ""),
-    (KernRe(r"__init +"), ""),
-    (KernRe(r"__init_or_module +"), ""),
-    (KernRe(r"__exit +"), ""),
-    (KernRe(r"__deprecated +"), ""),
-    (KernRe(r"__flatten +"), ""),
-    (KernRe(r"__meminit +"), ""),
-    (KernRe(r"__must_check +"), ""),
-    (KernRe(r"__weak +"), ""),
-    (KernRe(r"__sched +"), ""),
-    (KernRe(r"_noprof"), ""),
-    (KernRe(r"__always_unused *"), ""),
-    (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
-    (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
-    (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
-    (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
-    (KernRe(r"__attribute_const__ +"), ""),
-    (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
-]
-
-#
-# Transforms for variable prototypes
-#
-var_xforms = [
-    (KernRe(r"__read_mostly"), ""),
-    (KernRe(r"__ro_after_init"), ""),
-    (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
-    (KernRe(r"(?://.*)$"), ""),
-    (KernRe(r"(?:/\*.*\*/)"), ""),
-    (KernRe(r";$"), ""),
-]
-
 #
 # Ancillary functions
 #
 
-def apply_transforms(xforms, text):
-    """
-    Apply a set of transforms to a block of text.
-    """
-    for search, subst in xforms:
-        text = search.sub(subst, text)
-    return text
-
 multi_space = KernRe(r'\s\s+')
 def trim_whitespace(s):
     """
@@ -395,11 +260,12 @@ class KernelDoc:
     #: String to write when a parameter is not described.
     undescribed = "-- undescribed --"
 
-    def __init__(self, config, fname):
+    def __init__(self, config, fname, xforms):
         """Initialize internal variables"""
 
         self.fname = fname
         self.config = config
+        self.xforms = xforms
 
         # Initial state for the state machines
         self.state = state.NORMAL
@@ -883,7 +749,7 @@ class KernelDoc:
         # Go through the list of members applying all of our transformations.
         #
         members = trim_private_members(members)
-        members = apply_transforms(struct_xforms, members)
+        members = self.xforms.apply("struct", members)
 
         nested = NestedMatch()
         for search, sub in struct_nested_prefixes:
@@ -1009,8 +875,7 @@ class KernelDoc:
         # Drop comments and macros to have a pure C prototype
         #
         if not declaration_name:
-            for r, sub in var_xforms:
-                proto = r.sub(sub, proto)
+            proto = self.xforms.apply("var", proto)
 
         proto = proto.rstrip()
 
@@ -1091,7 +956,7 @@ class KernelDoc:
             #
             # Apply the initial transformations.
             #
-            prototype = apply_transforms(function_xforms, prototype)
+            prototype = self.xforms.apply("func", prototype)
 
         # Yes, this truly is vile.  We are looking for:
         # 1. Return type (may be nothing if we're looking at a macro)
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
new file mode 100644
index 000000000000..e6e0302e5dd0
--- /dev/null
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import re
+
+from kdoc.kdoc_re import KernRe
+
+struct_args_pattern = r'([^,)]+)'
+
+class CTransforms:
+    """
+    Data class containing a long set of transformations to turn
+    structure member prefixes, and macro invocations and variables
+    into something we can parse and generate kdoc for.
+    """
+
+    #: Transforms for structs and unions.
+    struct_xforms = [
+        # Strip attributes
+        (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
+        (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__packed\s*', re.S), ' '),
+        (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+        (KernRe(r'\s*__private', re.S), ' '),
+        (KernRe(r'\s*__rcu', re.S), ' '),
+        (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+        (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+        (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
+        #
+        # Unwrap struct_group macros based on this definition:
+        # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
+        # which has variants like: struct_group(NAME, MEMBERS...)
+        # Only MEMBERS arguments require documentation.
+        #
+        # Parsing them happens on two steps:
+        #
+        # 1. drop struct group arguments that aren't at MEMBERS,
+        #    storing them as STRUCT_GROUP(MEMBERS)
+        #
+        # 2. remove STRUCT_GROUP() ancillary macro.
+        #
+        # The original logic used to remove STRUCT_GROUP() using an
+        # advanced regex:
+        #
+        #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
+        #
+        # with two patterns that are incompatible with
+        # Python re module, as it has:
+        #
+        #   - a recursive pattern: (?1)
+        #   - an atomic grouping: (?>...)
+        #
+        # I tried a simpler version: but it didn't work either:
+        #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
+        #
+        # As it doesn't properly match the end parenthesis on some cases.
+        #
+        # So, a better solution was crafted: there's now a NestedMatch
+        # class that ensures that delimiters after a search are properly
+        # matched. So, the implementation to drop STRUCT_GROUP() will be
+        # handled in separate.
+        #
+        (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
+        (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
+        (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
+        (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
+        #
+        # Replace macros
+        #
+        # TODO: use NestedMatch for FOO($1, $2, ...) matches
+        #
+        # it is better to also move those to the NestedMatch logic,
+        # to ensure that parentheses will be properly matched.
+        #
+        (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
+        r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+        (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
+        r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+        (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+                re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+        (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+                re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+        (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
+                r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+        (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
+                struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+        (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
+                struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
+        (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+        (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+        (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
+    ]
+
+    #: Transforms for function prototypes.
+    function_xforms = [
+        (KernRe(r"^static +"), ""),
+        (KernRe(r"^extern +"), ""),
+        (KernRe(r"^asmlinkage +"), ""),
+        (KernRe(r"^inline +"), ""),
+        (KernRe(r"^__inline__ +"), ""),
+        (KernRe(r"^__inline +"), ""),
+        (KernRe(r"^__always_inline +"), ""),
+        (KernRe(r"^noinline +"), ""),
+        (KernRe(r"^__FORTIFY_INLINE +"), ""),
+        (KernRe(r"__init +"), ""),
+        (KernRe(r"__init_or_module +"), ""),
+        (KernRe(r"__exit +"), ""),
+        (KernRe(r"__deprecated +"), ""),
+        (KernRe(r"__flatten +"), ""),
+        (KernRe(r"__meminit +"), ""),
+        (KernRe(r"__must_check +"), ""),
+        (KernRe(r"__weak +"), ""),
+        (KernRe(r"__sched +"), ""),
+        (KernRe(r"_noprof"), ""),
+        (KernRe(r"__always_unused *"), ""),
+        (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
+        (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
+        (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
+        (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+        (KernRe(r"__attribute_const__ +"), ""),
+        (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
+    ]
+
+    #: Transforms for variable prototypes.
+    var_xforms = [
+        (KernRe(r"__read_mostly"), ""),
+        (KernRe(r"__ro_after_init"), ""),
+        (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
+        (KernRe(r"(?://.*)$"), ""),
+        (KernRe(r"(?:/\*.*\*/)"), ""),
+        (KernRe(r";$"), ""),
+    ]
+
+    #: Transforms main dictionary used at apply_transforms().
+    xforms = {
+        "struct": struct_xforms,
+        "func": function_xforms,
+        "var": var_xforms,
+    }
+
+    def apply(self, xforms_type, text):
+        """
+        Apply a set of transforms to a block of text.
+        """
+        if xforms_type not in self.xforms:
+            return text
+
+        for search, subst in self.xforms[xforms_type]:
+            text = search.sub(subst, text)
+        return text
-- 
cgit v1.2.3


From 4ff59bdd93f0e80b5014977502d082c778f96304 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 2 Mar 2026 17:40:56 +0100
Subject: docs: xforms_lists: ignore context analysis and lock attributes

Drop context analysis and lock (tracking) attributes to avoid
kernel-doc warnings.

There are now lots of warnings like these:

    Documentation/core-api/kref:328: ../include/linux/kref.h:72: WARNING: Invalid C declaration: Expected end of definition. [error at 96]
      int kref_put_mutex (struct kref *kref, void (*release)(struct kref *kref), struct mutex *mutex) __cond_acquires(true# mutex)
      ------------------------------------------------------------------------------------------------^
    Documentation/core-api/kref:328: ../include/linux/kref.h:94: WARNING: Invalid C declaration: Expected end of definition. [error at 92]
      int kref_put_lock (struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) __cond_acquires(true# lock)
      --------------------------------------------------------------------------------------------^

The regex is suggested by Mauro; mine was too greedy. Thanks.
Updated context analysis and lock macros list provided by PeterZ. Thanks.

[mchehab: modified to be applied after xforms_lists split]

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/all/20260107161548.45530e1c@canb.auug.org.au/
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <3c7fdfc364a8920f92530b47bdbf4bb29a40371f.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  | 10 ++++++++++
 tools/lib/python/kdoc/xforms_lists.py |  5 +++++
 2 files changed, 15 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index d7daf658e9d2..503a18212747 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -75,6 +75,16 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
 # is allowed.
 #
 struct_nested_prefixes = [
+    (re.compile(r"__cond_acquires\s*\("), ""),
+    (re.compile(r"__cond_releases\s*\("), ""),
+    (re.compile(r"__acquires\s*\("), ""),
+    (re.compile(r"__releases\s*\("), ""),
+    (re.compile(r"__must_hold\s*\("), ""),
+    (re.compile(r"__must_not_hold\s*\("), ""),
+    (re.compile(r"__must_hold_shared\s*\("), ""),
+    (re.compile(r"__cond_acquires_shared\s*\("), ""),
+    (re.compile(r"__acquires_shared\s*\("), ""),
+    (re.compile(r"__releases_shared\s*\("), ""),
     (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
 ]
 
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index e6e0302e5dd0..1bda7c4634c3 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -22,6 +22,8 @@ class CTransforms:
         (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
         (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
         (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '),
+        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '),
         (KernRe(r'\s*__packed\s*', re.S), ' '),
         (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
         (KernRe(r'\s*__private', re.S), ' '),
@@ -120,6 +122,7 @@ class CTransforms:
         (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
         (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
         (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+        (KernRe(r"__no_context_analysis\s*"), ""),
         (KernRe(r"__attribute_const__ +"), ""),
         (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
     ]
@@ -128,6 +131,8 @@ class CTransforms:
     var_xforms = [
         (KernRe(r"__read_mostly"), ""),
         (KernRe(r"__ro_after_init"), ""),
+        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""),
+        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""),
         (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
         (KernRe(r"(?://.*)$"), ""),
         (KernRe(r"(?:/\*.*\*/)"), ""),
-- 
cgit v1.2.3


From 134468b0e2043efec4bd25dc6bcef238358a8111 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:57 +0100
Subject: docs: kdoc_re: handle strings and escape chars on NextMatch

The logic inside NestedMatch currently doesn't consider that
function arguments may have chars and strings, which may
eventually contain delimiters.

Add logic to handle strings and escape characters on them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <ac25335bc2d09649e17d1c86c17d3f8f2e8ec27c.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 664c04c8cc9f..0a7f12616f9f 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -216,6 +216,8 @@ class NestedMatch:
         for match_re in regex.finditer(line):
             start = match_re.start()
             offset = match_re.end()
+            string_char = None
+            escape = False
 
             d = line[offset - 1]
             if d not in self.DELIMITER_PAIRS:
@@ -229,6 +231,22 @@ class NestedMatch:
 
                 d = line[pos]
 
+                if escape:
+                    escape = False
+                    continue
+
+                if string_char:
+                    if d == '\\':
+                        escape = True
+                    elif d == string_char:
+                        string_char = None
+
+                    continue
+
+                if d in ('"', "'"):
+                    string_char = d
+                    continue
+
                 if d in self.DELIMITER_PAIRS:
                     end = self.DELIMITER_PAIRS[d]
 
-- 
cgit v1.2.3


From 962bdc440df58008e0319d6cbe08c4ca1193c112 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:58 +0100
Subject: docs: kdoc_re: don't recompile NestedMatch regex every time

Store delimiters and its regex-compiled version as const vars.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <0cf2b72d4785aa8b727188b56688ff442d1c65ce.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 0a7f12616f9f..00afa5bccd6d 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -99,6 +99,13 @@ class KernRe:
         self.last_match = self.regex.search(string)
         return self.last_match
 
+    def finditer(self,  string):
+        """
+        Alias to re.finditer.
+        """
+
+        return self.regex.finditer(string)
+
     def findall(self, string):
         """
         Alias to re.findall.
@@ -134,6 +141,16 @@ class KernRe:
 
         return self.last_match.groups()
 
+#: Nested delimited pairs (brackets and parenthesis)
+DELIMITER_PAIRS = {
+    '{': '}',
+    '(': ')',
+    '[': ']',
+}
+
+#: compiled delimiters
+RE_DELIM = KernRe(r'[\{\}\[\]\(\)]')
+
 
 class NestedMatch:
     """
@@ -183,14 +200,6 @@ class NestedMatch:
     #
     #   FOO(arg1, arg2, arg3)
 
-    DELIMITER_PAIRS = {
-        '{': '}',
-        '(': ')',
-        '[': ']',
-    }
-
-    RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
-
     def _search(self, regex, line):
         """
         Finds paired blocks for a regex that ends with a delimiter.
@@ -220,13 +229,13 @@ class NestedMatch:
             escape = False
 
             d = line[offset - 1]
-            if d not in self.DELIMITER_PAIRS:
+            if d not in DELIMITER_PAIRS:
                 continue
 
-            end = self.DELIMITER_PAIRS[d]
+            end = DELIMITER_PAIRS[d]
             stack.append(end)
 
-            for match in self.RE_DELIM.finditer(line[offset:]):
+            for match in RE_DELIM.finditer(line[offset:]):
                 pos = match.start() + offset
 
                 d = line[pos]
@@ -247,8 +256,8 @@ class NestedMatch:
                     string_char = d
                     continue
 
-                if d in self.DELIMITER_PAIRS:
-                    end = self.DELIMITER_PAIRS[d]
+                if d in DELIMITER_PAIRS:
+                    end = DELIMITER_PAIRS[d]
 
                     stack.append(end)
                     continue
-- 
cgit v1.2.3


From 34503b5fd10d8c7f1b1f4fecb6aae826fcf79424 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:40:59 +0100
Subject: docs: kdoc_re: Change NestedMath args replacement to \0

Future patches will allow parsing each argument instead of the
hole set. Prepare for it by changing the replace all args from
\1 to \0.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <46e383118be9d9e432e3814fe819ebb12261d7b4.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 2 +-
 tools/lib/python/kdoc/kdoc_re.py     | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 503a18212747..0f90c16cb51a 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -85,7 +85,7 @@ struct_nested_prefixes = [
     (re.compile(r"__cond_acquires_shared\s*\("), ""),
     (re.compile(r"__acquires_shared\s*\("), ""),
     (re.compile(r"__releases_shared\s*\("), ""),
-    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
+    (re.compile(r'\bSTRUCT_GROUP\('), r'\0'),
 ]
 
 #
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 00afa5bccd6d..ea4f6f3d9e42 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -188,7 +188,7 @@ class NestedMatch:
     # except that the content inside the match group is delimiter-aligned.
     #
     # The content inside parentheses is converted into a single replace
-    # group (e.g. r`\1').
+    # group (e.g. r`\0').
     #
     # It would be nice to change such definition to support multiple
     # match groups, allowing a regex equivalent to:
@@ -291,7 +291,7 @@ class NestedMatch:
 
         if the sub argument contains::
 
-            r'\1'
+            r'\0'
 
         it will work just like re: it places there the matched paired data
         with the delimiter stripped.
@@ -310,9 +310,9 @@ class NestedMatch:
             # Value, ignoring start/end delimiters
             value = line[end:pos - 1]
 
-            # replaces \1 at the sub string, if \1 is used there
+            # replaces \0 at the sub string, if \0 is used there
             new_sub = sub
-            new_sub = new_sub.replace(r'\1', value)
+            new_sub = new_sub.replace(r'\0', value)
 
             out += new_sub
 
-- 
cgit v1.2.3


From fc44c0a0b2a72f2e9331063a311a548634ae18af Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:41:00 +0100
Subject: docs: kdoc_re: make NestedMatch use KernRe

Instead of using re_compile, let's create the class with the
regex and use KernRe to keep it cached.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <cdf900faf0ed8a08f8c6ac1db5a43342968c0739.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 25 ++++++++++++-------------
 tools/lib/python/kdoc/kdoc_re.py     | 24 +++++++++++++++++-------
 2 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 0f90c16cb51a..cd9857906a2b 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -75,17 +75,17 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
 # is allowed.
 #
 struct_nested_prefixes = [
-    (re.compile(r"__cond_acquires\s*\("), ""),
-    (re.compile(r"__cond_releases\s*\("), ""),
-    (re.compile(r"__acquires\s*\("), ""),
-    (re.compile(r"__releases\s*\("), ""),
-    (re.compile(r"__must_hold\s*\("), ""),
-    (re.compile(r"__must_not_hold\s*\("), ""),
-    (re.compile(r"__must_hold_shared\s*\("), ""),
-    (re.compile(r"__cond_acquires_shared\s*\("), ""),
-    (re.compile(r"__acquires_shared\s*\("), ""),
-    (re.compile(r"__releases_shared\s*\("), ""),
-    (re.compile(r'\bSTRUCT_GROUP\('), r'\0'),
+    (NestedMatch(r"__cond_acquires\s*\("), ""),
+    (NestedMatch(r"__cond_releases\s*\("), ""),
+    (NestedMatch(r"__acquires\s*\("), ""),
+    (NestedMatch(r"__releases\s*\("), ""),
+    (NestedMatch(r"__must_hold\s*\("), ""),
+    (NestedMatch(r"__must_not_hold\s*\("), ""),
+    (NestedMatch(r"__must_hold_shared\s*\("), ""),
+    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
+    (NestedMatch(r"__acquires_shared\s*\("), ""),
+    (NestedMatch(r"__releases_shared\s*\("), ""),
+    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
 ]
 
 #
@@ -761,9 +761,8 @@ class KernelDoc:
         members = trim_private_members(members)
         members = self.xforms.apply("struct", members)
 
-        nested = NestedMatch()
         for search, sub in struct_nested_prefixes:
-            members = nested.sub(search, sub, members)
+            members = search.sub(search, sub, members)
         #
         # Deal with embedded struct and union members, and drop enums entirely.
         #
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index ea4f6f3d9e42..085b89a4547c 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -200,7 +200,10 @@ class NestedMatch:
     #
     #   FOO(arg1, arg2, arg3)
 
-    def _search(self, regex, line):
+    def __init__(self, regex):
+        self.regex = KernRe(regex)
+
+    def _search(self, line):
         """
         Finds paired blocks for a regex that ends with a delimiter.
 
@@ -222,7 +225,7 @@ class NestedMatch:
 
         stack = []
 
-        for match_re in regex.finditer(line):
+        for match_re in self.regex.finditer(line):
             start = match_re.start()
             offset = match_re.end()
             string_char = None
@@ -270,7 +273,7 @@ class NestedMatch:
                         yield start, offset, pos + 1
                         break
 
-    def search(self, regex, line):
+    def search(self, line):
         """
         This is similar to re.search:
 
@@ -278,12 +281,12 @@ class NestedMatch:
         returning occurrences only if all delimiters are paired.
         """
 
-        for t in self._search(regex, line):
+        for t in self._search(line):
 
             yield line[t[0]:t[2]]
 
-    def sub(self, regex, sub, line, count=0):
-        r"""
+    def sub(self, sub, line, count=0):
+        """
         This is similar to re.sub:
 
         It matches a regex that it is followed by a delimiter,
@@ -304,7 +307,7 @@ class NestedMatch:
         cur_pos = 0
         n = 0
 
-        for start, end, pos in self._search(regex, line):
+        for start, end, pos in self._search(line):
             out += line[cur_pos:start]
 
             # Value, ignoring start/end delimiters
@@ -331,3 +334,10 @@ class NestedMatch:
         out += line[cur_pos:l]
 
         return out
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        return f'NestedMatch("{self.regex.regex.pattern}")'
-- 
cgit v1.2.3


From 85c2a51357f720fabfb6fa8d2551d87a94e797cb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 2 Mar 2026 17:41:01 +0100
Subject: docs: kdoc_parser: move nested match transforms to xforms_lists.py

As NestedMatch now has a sub method and a declaration close to
what KernRe does, we can move the rules to xforms_lists and
simplify kdoc_parser a little bit.

No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <762ce2a58ff024c1b0b6f6a6e05020d1415b8308.1772469446.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  | 21 ---------------------
 tools/lib/python/kdoc/xforms_lists.py | 14 +++++++++++++-
 2 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index cd9857906a2b..edf70ba139a5 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -69,25 +69,6 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
                         r'(?:[-:].*)?$',		# description (not captured)
                         cache = False)
 
-#
-# Regexes here are guaranteed to have the end delimiter matching
-# the start delimiter. Yet, right now, only one replace group
-# is allowed.
-#
-struct_nested_prefixes = [
-    (NestedMatch(r"__cond_acquires\s*\("), ""),
-    (NestedMatch(r"__cond_releases\s*\("), ""),
-    (NestedMatch(r"__acquires\s*\("), ""),
-    (NestedMatch(r"__releases\s*\("), ""),
-    (NestedMatch(r"__must_hold\s*\("), ""),
-    (NestedMatch(r"__must_not_hold\s*\("), ""),
-    (NestedMatch(r"__must_hold_shared\s*\("), ""),
-    (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
-    (NestedMatch(r"__acquires_shared\s*\("), ""),
-    (NestedMatch(r"__releases_shared\s*\("), ""),
-    (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
-]
-
 #
 # Ancillary functions
 #
@@ -761,8 +742,6 @@ class KernelDoc:
         members = trim_private_members(members)
         members = self.xforms.apply("struct", members)
 
-        for search, sub in struct_nested_prefixes:
-            members = search.sub(search, sub, members)
         #
         # Deal with embedded struct and union members, and drop enums entirely.
         #
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 1bda7c4634c3..c07cbe1e6349 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -4,7 +4,7 @@
 
 import re
 
-from kdoc.kdoc_re import KernRe
+from kdoc.kdoc_re import KernRe, NestedMatch
 
 struct_args_pattern = r'([^,)]+)'
 
@@ -94,6 +94,18 @@ class CTransforms:
         (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
         (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
         (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
+
+        (NestedMatch(r"__cond_acquires\s*\("), ""),
+        (NestedMatch(r"__cond_releases\s*\("), ""),
+        (NestedMatch(r"__acquires\s*\("), ""),
+        (NestedMatch(r"__releases\s*\("), ""),
+        (NestedMatch(r"__must_hold\s*\("), ""),
+        (NestedMatch(r"__must_not_hold\s*\("), ""),
+        (NestedMatch(r"__must_hold_shared\s*\("), ""),
+        (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
+        (NestedMatch(r"__acquires_shared\s*\("), ""),
+        (NestedMatch(r"__releases_shared\s*\("), ""),
+        (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
     ]
 
     #: Transforms for function prototypes.
-- 
cgit v1.2.3


From 861dcdb6ad6f339a5958764352e626e2af7df4c1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:25:14 +0100
Subject: docs: kdoc_files: allows the caller to use a different xforms class

While the main goal for kernel-doc is to be used inside the Linux
Kernel, other open source projects could benefit for it. That's
currently the case of QEMU, which has a fork, mainly due to two
reasons:

  - they need an extra C function transform rule;
  - they handle the html output a little bit different.

Add an extra optional argument to make easier for the code to be
shared, as, with that, QEMU can just create a new derivated class
that will contain its specific rulesets, and just copy the
remaining kernel-doc files as-is.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <6b274ddbdcd9d438c6848e00e410a2f65ef80ec2.1772810574.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 33618c6abec2..c35e033cf123 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -118,7 +118,7 @@ class KernelFiles():
         if fname in self.files:
             return
 
-        doc = KernelDoc(self.config, fname, CTransforms())
+        doc = KernelDoc(self.config, fname, self.xforms)
         export_table, entries = doc.parse_kdoc()
 
         self.export_table[fname] = export_table
@@ -154,7 +154,7 @@ class KernelFiles():
 
         self.error(f"Cannot find file {fname}")
 
-    def __init__(self, verbose=False, out_style=None,
+    def __init__(self, verbose=False, out_style=None, xforms=None,
                  werror=False, wreturn=False, wshort_desc=False,
                  wcontents_before_sections=False,
                  logger=None):
@@ -193,6 +193,11 @@ class KernelFiles():
         self.config.wshort_desc = wshort_desc
         self.config.wcontents_before_sections = wcontents_before_sections
 
+        if xforms:
+            self.xforms = xforms
+        else:
+            self.xforms = CTransforms()
+
         if not logger:
             self.config.log = logging.getLogger("kernel-doc")
         else:
-- 
cgit v1.2.3


From 0d3ab0e4bbfd688bfaef66b6365a71c70a0f0450 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:25:15 +0100
Subject: docs: kdoc_files: document KernelFiles() ABI

The KernelFiles is the main entry point to run kernel-doc,
being used by both tools/docs/kernel-doc and
Documentation/sphinx/kerneldoc.py.

It is also used on QEMU, which also uses the kernel-doc
libraries from tools/lib/python/kdoc.

Properly describe its ABI contract.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <13eb44c341232564eaf2a9c9de4782369fef57e1.1772810574.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py | 44 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index c35e033cf123..8c2059623949 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -91,7 +91,49 @@ class KernelFiles():
     """
     Parse kernel-doc tags on multiple kernel source files.
 
-    There are two type of parsers defined here:
+    This is the main entry point to run kernel-doc. This class is initialized
+    using a series of optional arguments:
+
+    ``verbose``
+        If True, enables kernel-doc verbosity. Default: False.
+
+    ``out_style``
+        Class to be used to format output. If None (default),
+        only report errors.
+
+    ``xforms``
+        Transforms to be applied to C prototypes and data structs.
+        If not specified, defaults to xforms = CFunction()
+
+    ``werror``
+        If True, treat warnings as errors, retuning an error code on warnings.
+
+        Default: False.
+
+    ``wreturn``
+        If True, warns about the lack of a return markup on functions.
+
+        Default: False.
+    ``wshort_desc``
+        If True, warns if initial short description is missing.
+
+        Default: False.
+
+    ``wcontents_before_sections``
+        If True, warn if there are contents before sections (deprecated).
+        This option is kept just for backward-compatibility, but it does
+        nothing, neither here nor at the original Perl script.
+
+        Default: False.
+
+    ``logger``
+        Optional logger class instance.
+
+        If not specified, defaults to use: ``logging.getLogger("kernel-doc")``
+
+    Note:
+        There are two type of parsers defined here:
+
         - self.parse_file(): parses both kernel-doc markups and
           ``EXPORT_SYMBOL*`` macros;
         - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros.
-- 
cgit v1.2.3


From c1873e77434db2c592cfd21dd7d2e34a5c18304f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:41 +0100
Subject: docs: kdoc_output: use a method to emit the .TH header

All man emit functions need to add a .TH header. Move the code
to a common function, as we'll be addressing some issues at
the common code.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <2e55fcfe8724fde08a78635a1a3f8b449a6adf82.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 4210b91dde5f..fb6b90c54c8a 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -607,7 +607,20 @@ class ManFormat(OutputFormat):
         "%m %d %Y",
     ]
 
-    def __init__(self, modulename):
+    def emit_th(self, name, modulename = None, manual=None):
+        """Emit a title header line."""
+        name = name.strip()
+
+        if not manual:
+            manual = self.manual
+
+        if not modulename:
+            modulename = self.modulename
+
+        self.data += f'.TH "{modulename}" {self.section} "{name}" '
+        self.data += f'"{self.date}" "{manual}" LINUX\n'
+
+    def __init__(self, modulename, section="9", manual="API Manual"):
         """
         Creates class variables.
 
@@ -616,7 +629,11 @@ class ManFormat(OutputFormat):
         """
 
         super().__init__()
+
         self.modulename = modulename
+        self.section = section
+        self.manual = manual
+
         self.symbols = []
 
         dt = None
@@ -632,7 +649,7 @@ class ManFormat(OutputFormat):
         if not dt:
             dt = datetime.now()
 
-        self.man_date = dt.strftime("%B %Y")
+        self.date = dt.strftime("%B %Y")
 
     def arg_name(self, args, name):
         """
@@ -724,7 +741,7 @@ class ManFormat(OutputFormat):
 
         out_name = self.arg_name(args, name)
 
-        self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+        self.emit_th(out_name)
 
         for section, text in args.sections.items():
             self.data += f'.SH "{section}"' + "\n"
@@ -734,7 +751,8 @@ class ManFormat(OutputFormat):
 
         out_name = self.arg_name(args, name)
 
-        self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
+        self.emit_th(out_name, modulename = name,
+                     manual="Kernel Hacker\'s Manual")
 
         self.data += ".SH NAME\n"
         self.data += f"{name} \\- {args['purpose']}\n"
@@ -780,7 +798,7 @@ class ManFormat(OutputFormat):
     def out_enum(self, fname, name, args):
         out_name = self.arg_name(args, name)
 
-        self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+        self.emit_th(out_name)
 
         self.data += ".SH NAME\n"
         self.data += f"enum {name} \\- {args['purpose']}\n"
@@ -813,7 +831,7 @@ class ManFormat(OutputFormat):
         out_name = self.arg_name(args, name)
         full_proto = args.other_stuff["full_proto"]
 
-        self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+        self.emit_th(out_name)
 
         self.data += ".SH NAME\n"
         self.data += f"{name} \\- {args['purpose']}\n"
@@ -834,7 +852,7 @@ class ManFormat(OutputFormat):
         purpose = args.get('purpose')
         out_name = self.arg_name(args, name)
 
-        self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+        self.emit_th(out_name)
 
         self.data += ".SH NAME\n"
         self.data += f"typedef {name} \\- {purpose}\n"
@@ -849,7 +867,7 @@ class ManFormat(OutputFormat):
         definition = args.get('definition')
         out_name = self.arg_name(args, name)
 
-        self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+        self.emit_th(out_name)
 
         self.data += ".SH NAME\n"
         self.data += f"{args.type} {name} \\- {purpose}\n"
-- 
cgit v1.2.3


From 43874045faa72b876da361fed4b3c9aeee09ebdb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:42 +0100
Subject: docs: kdoc_output: remove extra attribute on man .TH headers

According with modern documents, groff .TH supports up to 5
arguments, but the logic passes 6. Drop the lastest one
("LINUX").

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <f5e480af877903b0596b6a56ef7a152eb8a10dbf.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index fb6b90c54c8a..d0b237c09391 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -618,7 +618,7 @@ class ManFormat(OutputFormat):
             modulename = self.modulename
 
         self.data += f'.TH "{modulename}" {self.section} "{name}" '
-        self.data += f'"{self.date}" "{manual}" LINUX\n'
+        self.data += f'"{self.date}" "{manual}"\n'
 
     def __init__(self, modulename, section="9", manual="API Manual"):
         """
-- 
cgit v1.2.3


From 31938f120fa261b983fed3315239fe1c5fc4e6e7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:43 +0100
Subject: docs: kdoc_output: use a single manual for everything

There's no reason why functions will be on a different manual.
Unify its name, calling it as "Kernel API Manual".

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <000e1174a551e97ad4710ad4f3750b22017bedd5.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index d0b237c09391..24ee1fad681e 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -607,20 +607,17 @@ class ManFormat(OutputFormat):
         "%m %d %Y",
     ]
 
-    def emit_th(self, name, modulename = None, manual=None):
+    def emit_th(self, name, modulename = None):
         """Emit a title header line."""
         name = name.strip()
 
-        if not manual:
-            manual = self.manual
-
         if not modulename:
             modulename = self.modulename
 
         self.data += f'.TH "{modulename}" {self.section} "{name}" '
-        self.data += f'"{self.date}" "{manual}"\n'
+        self.data += f'"{self.date}" "{self.manual}"\n'
 
-    def __init__(self, modulename, section="9", manual="API Manual"):
+    def __init__(self, modulename, section="9", manual="Kernel API Manual"):
         """
         Creates class variables.
 
@@ -751,8 +748,7 @@ class ManFormat(OutputFormat):
 
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name, modulename = name,
-                     manual="Kernel Hacker\'s Manual")
+        self.emit_th(out_name, modulename = name)
 
         self.data += ".SH NAME\n"
         self.data += f"{name} \\- {args['purpose']}\n"
-- 
cgit v1.2.3


From 1a63342a2774c734b73841fdfa41cf4d8d58cd94 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:44 +0100
Subject: docs: kdoc_output: don't use a different modulename for functions

It doesn't make much sense to have a different modulename just
for functions, but not for structs/enums/...

Use the same header everywere.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <978259bdf3e8d310c646ecf76ce56d054f6d5738.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 24ee1fad681e..62e300e65405 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -607,14 +607,11 @@ class ManFormat(OutputFormat):
         "%m %d %Y",
     ]
 
-    def emit_th(self, name, modulename = None):
+    def emit_th(self, name):
         """Emit a title header line."""
         name = name.strip()
 
-        if not modulename:
-            modulename = self.modulename
-
-        self.data += f'.TH "{modulename}" {self.section} "{name}" '
+        self.data += f'.TH "{self.modulename}" {self.section} "{name}" '
         self.data += f'"{self.date}" "{self.manual}"\n'
 
     def __init__(self, modulename, section="9", manual="Kernel API Manual"):
@@ -748,7 +745,7 @@ class ManFormat(OutputFormat):
 
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name, modulename = name)
+        self.emit_th(out_name)
 
         self.data += ".SH NAME\n"
         self.data += f"{name} \\- {args['purpose']}\n"
-- 
cgit v1.2.3


From 4160533d058cfa667159e8d6a46fe42c738a4a84 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:45 +0100
Subject: docs: kdoc_output: fix naming for DOC markups

Right now, DOC markups aren't being handled properly, as it was
using the same name for all output.

Fix it by filling the title argument on a different way.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <11d809e5c4bec23240d3ace3f342dbb2a9263446.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 38 ++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 15 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 62e300e65405..cf834dbf2725 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -607,14 +607,21 @@ class ManFormat(OutputFormat):
         "%m %d %Y",
     ]
 
-    def emit_th(self, name):
+    def modulename(self, args):
+        if self._modulename:
+            return self._modulename
+
+        return os.path.dirname(args.fname)
+
+    def emit_th(self, name, args):
         """Emit a title header line."""
-        name = name.strip()
+        title = name.strip()
+        module = self.modulename(args)
 
-        self.data += f'.TH "{self.modulename}" {self.section} "{name}" '
-        self.data += f'"{self.date}" "{self.manual}"\n'
+        self.data += f'.TH "{name}" {self.section} "{self.date}" '
+        self.data += f'"{self.modulename}" "{self.manual}"\n'
 
-    def __init__(self, modulename, section="9", manual="Kernel API Manual"):
+    def __init__(self, modulename=None, section="9", manual="Kernel API Manual"):
         """
         Creates class variables.
 
@@ -624,7 +631,7 @@ class ManFormat(OutputFormat):
 
         super().__init__()
 
-        self.modulename = modulename
+        self._modulename = modulename
         self.section = section
         self.manual = manual
 
@@ -658,7 +665,8 @@ class ManFormat(OutputFormat):
         dtype = args.type
 
         if dtype == "doc":
-            return self.modulename
+            return name
+#            return os.path.basename(self.modulename(args))
 
         if dtype in ["function", "typedef"]:
             return name
@@ -735,7 +743,7 @@ class ManFormat(OutputFormat):
 
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name)
+        self.emit_th(out_name, args)
 
         for section, text in args.sections.items():
             self.data += f'.SH "{section}"' + "\n"
@@ -745,7 +753,7 @@ class ManFormat(OutputFormat):
 
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name)
+        self.emit_th(out_name, args)
 
         self.data += ".SH NAME\n"
         self.data += f"{name} \\- {args['purpose']}\n"
@@ -791,7 +799,7 @@ class ManFormat(OutputFormat):
     def out_enum(self, fname, name, args):
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name)
+        self.emit_th(out_name, args)
 
         self.data += ".SH NAME\n"
         self.data += f"enum {name} \\- {args['purpose']}\n"
@@ -824,7 +832,7 @@ class ManFormat(OutputFormat):
         out_name = self.arg_name(args, name)
         full_proto = args.other_stuff["full_proto"]
 
-        self.emit_th(out_name)
+        self.emit_th(out_name, args)
 
         self.data += ".SH NAME\n"
         self.data += f"{name} \\- {args['purpose']}\n"
@@ -841,11 +849,11 @@ class ManFormat(OutputFormat):
             self.output_highlight(text)
 
     def out_typedef(self, fname, name, args):
-        module = self.modulename
+        module = self.modulename(args)
         purpose = args.get('purpose')
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name)
+        self.emit_th(out_name, args)
 
         self.data += ".SH NAME\n"
         self.data += f"typedef {name} \\- {purpose}\n"
@@ -855,12 +863,12 @@ class ManFormat(OutputFormat):
             self.output_highlight(text)
 
     def out_struct(self, fname, name, args):
-        module = self.modulename
+        module = self.modulename(args)
         purpose = args.get('purpose')
         definition = args.get('definition')
         out_name = self.arg_name(args, name)
 
-        self.emit_th(out_name)
+        self.emit_th(out_name, args)
 
         self.data += ".SH NAME\n"
         self.data += f"{args.type} {name} \\- {purpose}\n"
-- 
cgit v1.2.3


From 26b4fdefc0f96b1e2e25e0482de1476d037ad325 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:46 +0100
Subject: docs: kdoc_output: describe the class init parameters

As this class is part of the ABI used by both Sphinx kerneldoc
extension and docs/tools/kernel-doc, better describe what
parmeters are used to initialize ManOutput class.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <7c57f26150aae11fced259f30898a980b96efb68.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index cf834dbf2725..7a181b40810d 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -580,7 +580,34 @@ class RestFormat(OutputFormat):
 
 
 class ManFormat(OutputFormat):
-    """Consts and functions used by man pages output."""
+    """
+    Consts and functions used by man pages output.
+
+    This class has one mandatory parameter and some optional ones, which
+    are needed to define the title header contents:
+
+    ``modulename``
+        Defines the module name to be used at the troff ``.TH`` output.
+
+        This argument is mandatory.
+
+    ``section``
+        Usually a numeric value from 0 to 9, but man pages also accept
+        some strings like "p".
+
+        Defauls to ``9``
+
+    ``manual``
+        Defaults to ``Kernel API Manual``.
+
+    The above controls the output of teh corresponding fields on troff
+    title headers, which will be filled like this::
+
+        .TH "{name}" {section} "{date}" "{modulename}" "{manual}"
+
+    where ``name``` will match the API symbol name, and ``date`` will be
+    either the date where the Kernel was compiled or the current date
+    """
 
     highlights = (
         (type_constant, r"\1"),
-- 
cgit v1.2.3


From e4dadcf510da846f32aaaad5d5988890cbf6033d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:47 +0100
Subject: docs: kdoc_output: pick a better default for modulename

Instead of placing the same data for modulename for all generated
man pages, use the directory from the filename used to produce
kernel docs as basis.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <8a5d91c93c0b9b34c2f60e389f4464742804d0d6.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 7a181b40810d..c25f80a39cdc 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -589,7 +589,8 @@ class ManFormat(OutputFormat):
     ``modulename``
         Defines the module name to be used at the troff ``.TH`` output.
 
-        This argument is mandatory.
+        This argument is optional. If not specified, it will be filled
+        with the directory which contains the documented file.
 
     ``section``
         Usually a numeric value from 0 to 9, but man pages also accept
@@ -645,8 +646,8 @@ class ManFormat(OutputFormat):
         title = name.strip()
         module = self.modulename(args)
 
-        self.data += f'.TH "{name}" {self.section} "{self.date}" '
-        self.data += f'"{self.modulename}" "{self.manual}"\n'
+        self.data += f'.TH "{title}" {self.section} "{self.date}" '
+        self.data += f'"{module}" "{self.manual}"\n'
 
     def __init__(self, modulename=None, section="9", manual="Kernel API Manual"):
         """
-- 
cgit v1.2.3


From cde7c96f88a0fe9ed53e8bb57147b19a725cf097 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:48 +0100
Subject: docs: kdoc_output: Change the logic to handle man highlight

The code inside ManFormat code to output man pages is too simple:
it produces very bad results when the content has tables or code
blocks.

Change the way lines are parsed there to allow adding extra
logic to handle some special cases.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <6ae2301a40b3fcb4381dd9dda8c75d14f9616b46.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index c25f80a39cdc..9caffe0d9753 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -755,15 +755,23 @@ class ManFormat(OutputFormat):
         if isinstance(contents, list):
             contents = "\n".join(contents)
 
-        for line in contents.strip("\n").split("\n"):
-            line = KernRe(r"^\s*").sub("", line)
-            if not line:
-                continue
+        lines = contents.strip("\n").split("\n")
+        i = 0
 
-            if line[0] == ".":
-                self.data += "\\&" + line + "\n"
-            else:
-                self.data += line + "\n"
+        while i < len(lines):
+            org_line = lines[i]
+
+            line = KernRe(r"^\s*").sub("", org_line)
+
+            if line:
+                if line[0] == ".":
+                    self.data += "\\&" + line + "\n"
+                    i += 1
+                    continue
+
+            i += 1
+
+            self.data += line + "\n"
 
     def out_doc(self, fname, name, args):
         if not self.check_doc(name, args):
-- 
cgit v1.2.3


From 4ec130cff633361c2217d2ba116ae32772087087 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:49 +0100
Subject: docs: kdoc_output: add a logic to handle tables inside kernel-doc
 markups

specially when DOC is used, it is not uncommon to have tables
inside a kernel-doc markup.

Add support for simple tables and complex grid tables when output
in groff format.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <442ad76442c325044eb9f34a155d5f484341fb35.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 130 +++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 9caffe0d9753..7848514a4d22 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -744,6 +744,126 @@ class ManFormat(OutputFormat):
 
         return self.data
 
+    def emit_table(self, colspec_row, rows):
+
+        if not rows:
+            return ""
+
+        out = ""
+        colspec = "\t".join(["l"] * len(rows[0]))
+
+        out += "\n.TS\n"
+        out += "box;\n"
+        out += f"{colspec}.\n"
+
+        if colspec_row:
+            out_row = []
+
+            for text in colspec_row:
+                out_row.append(f"\\fB{text}\\fP")
+
+            out += "\t".join(out_row) + "\n_\n"
+
+        for r in rows:
+            out += "\t".join(r) + "\n"
+
+        out += ".TE\n"
+
+        return out
+
+    def grid_table(self, lines, start):
+        """
+        Ancillary function to help handling a grid table inside the text.
+        """
+
+        i = start + 1
+        rows = []
+        colspec_row = None
+
+        while i < len(lines):
+            line = lines[i]
+
+            if KernRe(r"^\s*\|.*\|\s*$").match(line):
+                parts = []
+
+                for p in line.strip('|').split('|'):
+                    parts.append(p.strip())
+
+                rows.append(parts)
+
+            elif KernRe(r'^\+\=[\+\=]+\+\s*$').match(line):
+                if rows and rows[0]:
+                    if not colspec_row:
+                        colspec_row = [""] * len(rows[0])
+
+                    for j in range(0, len(rows[0])):
+                        content = []
+                        for row in rows:
+                            content.append(row[j])
+
+                        colspec_row[j] = " ".join(content)
+
+                    rows = []
+
+            elif KernRe(r"^\s*\+[-+]+\+.*$").match(line):
+                pass
+
+            else:
+                break
+
+            i += 1
+
+        return i, self.emit_table(colspec_row, rows)
+
+    def simple_table(self, lines, start):
+        """
+        Ancillary function to help handling a simple table inside the text.
+        """
+
+        i = start
+        rows = []
+        colspec_row = None
+
+        pos = []
+        for m in KernRe(r'\-+').finditer(lines[i]):
+            pos.append((m.start(), m.end() - 1))
+
+        i += 1
+        while i < len(lines):
+            line = lines[i]
+
+            if KernRe(r"^\s*[\-]+[ \t\-]+$").match(line):
+                i += 1
+                break
+
+            elif KernRe(r'^[\s=]+$').match(line):
+                if rows and rows[0]:
+                    if not colspec_row:
+                        colspec_row = [""] * len(rows[0])
+
+                    for j in range(0, len(rows[0])):
+                        content = []
+                        for row in rows:
+                            content.append(row[j])
+
+                        colspec_row[j] = " ".join(content)
+
+                    rows = []
+
+            else:
+                row = [""] * len(pos)
+
+                for j in range(0, len(pos)):
+                    start, end = pos[j]
+
+                    row[j] = line[start:end].strip()
+
+                rows.append(row)
+
+            i += 1
+
+        return i, self.emit_table(colspec_row, rows)
+
     def output_highlight(self, block):
         """
         Outputs a C symbol that may require being highlighted with
@@ -764,6 +884,16 @@ class ManFormat(OutputFormat):
             line = KernRe(r"^\s*").sub("", org_line)
 
             if line:
+                if KernRe(r"^\+\-[-+]+\+.*$").match(line):
+                    i, text = self.grid_table(lines, i)
+                    self.data += text
+                    continue
+
+                if KernRe(r"^\-+[ \t]\-[ \t\-]+$").match(line):
+                    i, text = self.simple_table(lines, i)
+                    self.data += text
+                    continue
+
                 if line[0] == ".":
                     self.data += "\\&" + line + "\n"
                     i += 1
-- 
cgit v1.2.3


From 908ae13b1864c05bcde8cfc7127ec147d28f9414 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:50 +0100
Subject: docs: kdoc_output: add support to handle code blocks

It is common to have code blocks inside kernel-doc markups.
By default, troff will group all lines altogether, producing a
very weird output. If a code block is detected by disabling
filling inside code blocks, re-enabling it afterwards.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <fb6f16ad345f7ec1b1ebe4c5ec7ea3d9cd6de4fb.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 64 ++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 7848514a4d22..df9af444da57 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -864,6 +864,65 @@ class ManFormat(OutputFormat):
 
         return i, self.emit_table(colspec_row, rows)
 
+    def code_block(self, lines, start):
+        """
+        Ensure that code blocks won't be messed up at the output.
+
+        By default, troff join lines at the same paragraph. Disable it,
+        on code blocks.
+        """
+
+        line = lines[start]
+
+        if "code-block" in line:
+            out = "\n.nf\n"
+        elif line.startswith("..") and line.endswith("::"):
+            #
+            # Handle note, warning, error, ... markups
+            #
+            line = line[2:-1].strip().upper()
+            out = f"\n.nf\n\\fB{line}\\fP\n"
+        elif line.endswith("::"):
+            out = line[:-1]
+            out += "\n.nf\n"
+        else:
+            # Just in case. Should never happen in practice
+            out = "\n.nf\n"
+
+        i = start + 1
+        ident = None
+
+        while i < len(lines):
+            line = lines[i]
+
+            m = KernRe(r"\S").match(line)
+            if not m:
+                out += line + "\n"
+                i += 1
+                continue
+
+            pos = m.start()
+            if not ident:
+                if pos > 0:
+                    ident = pos
+                else:
+                    out += "\n.fi\n"
+                    if i > start + 1:
+                        return i - 1, out
+                    else:
+                        # Just in case. Should never happen in practice
+                        return i, out
+
+            if pos >= ident:
+                out += line + "\n"
+                i += 1
+                continue
+
+            break
+
+        out += "\n.fi\n"
+        return i, out
+
     def output_highlight(self, block):
         """
         Outputs a C symbol that may require being highlighted with
@@ -894,6 +953,11 @@ class ManFormat(OutputFormat):
                     self.data += text
                     continue
 
+                if line.endswith("::") or KernRe(r"\.\.\s+code-block.*::").match(line):
+                    i, text = self.code_block(lines, i)
+                    self.data += text
+                    continue
+
                 if line[0] == ".":
                     self.data += "\\&" + line + "\n"
                     i += 1
-- 
cgit v1.2.3


From ab9150972f21c41d4487e5d4b21cea0ecfe0bb94 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 6 Mar 2026 16:45:51 +0100
Subject: docs: kdoc_output: better handle lists

On several functions, the return values are inside a bullet
list. Also, on some places, there are numbered lists as well.

Use a troff markup to format them, to avoid placing everything
on a single line.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <edea87623550a51086c23c9af0edc5e9fcce0ed6.1772810752.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index df9af444da57..08539dd92cbb 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -963,6 +963,14 @@ class ManFormat(OutputFormat):
                     i += 1
                     continue
 
+                #
+                # Handle lists
+                #
+                line = KernRe(r'^[-*]\s+').sub(r'.IP \[bu]\n', line)
+                line = KernRe(r'^(\d+|a-z)[\.\)]\s+').sub(r'.IP \1\n', line)
+            else:
+                line = ".PP\n"
+
             i += 1
 
             self.data += line + "\n"
-- 
cgit v1.2.3


From 9b4e099c221cd118b9dbe720586c1f1c71666d09 Mon Sep 17 00:00:00 2001
From: Ricardo Ungerer <ungerer.ricardo@gmail.com>
Date: Mon, 16 Mar 2026 22:02:17 +0000
Subject: jobserver: Fix typo in docstring

This commit fixes small typos in the docstring of jobserver.py.

Signed-off-by: Ricardo Ungerer <ungerer.ricardo@gmail.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <20260316220218.568022-1-ungerer.ricardo@gmail.com>
---
 tools/lib/python/jobserver.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py
index aba22c33393d..0b1ffdf9f7a3 100755
--- a/tools/lib/python/jobserver.py
+++ b/tools/lib/python/jobserver.py
@@ -8,14 +8,14 @@
 """
 Interacts with the POSIX jobserver during the Kernel build time.
 
-A "normal" jobserver task, like the one initiated by a make subrocess would do:
+A "normal" jobserver task, like the one initiated by a make subprocess would do:
 
     - open read/write file descriptors to communicate with the job server;
     - ask for one slot by calling::
 
         claim = os.read(reader, 1)
 
-    - when the job finshes, call::
+    - when the job finishes, call::
 
         os.write(writer, b"+")  # os.write(writer, claim)
 
-- 
cgit v1.2.3


From 023aabb6ccb298add344cab7c00c5f27b10319aa Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:21 +0100
Subject: docs: python: add helpers to run unit tests

While python internal libraries have support for unit tests, its
output is not nice. Add a helper module to improve its output.

I wrote this module last year while testing some scripts I used
internally. The initial skeleton was generated with the help of
LLM tools, but it was higly modified to ensure that it will work
as I would expect.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <37999041f616ddef41e84cf2686c0264d1a51dc9.1773074166.git.mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <d81be167b8cdeb003c1f8dcc7ad83a5ed2b520b6.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/unittest_helper.py | 353 ++++++++++++++++++++++++++++++++++++
 1 file changed, 353 insertions(+)
 create mode 100755 tools/lib/python/unittest_helper.py

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py
new file mode 100755
index 000000000000..55d444cd73d4
--- /dev/null
+++ b/tools/lib/python/unittest_helper.py
@@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025-2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0103,R0912,R0914,E1101
+
+"""
+Provides helper functions and classes execute python unit tests.
+
+Those help functions provide a nice colored output summary of each
+executed test and, when a test fails, it shows the different in diff
+format when running in verbose mode, like::
+
+    $ tools/unittests/nested_match.py -v
+    ...
+    Traceback (most recent call last):
+    File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in test_count_limit
+        self.assertEqual(replaced, "bar(a); bar(b); foo(c)")
+        ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    AssertionError: 'bar(a) foo(b); foo(c)' != 'bar(a); bar(b); foo(c)'
+    - bar(a) foo(b); foo(c)
+    ?       ^^^^
+    + bar(a); bar(b); foo(c)
+    ?       ^^^^^
+    ...
+
+It also allows filtering what tests will be executed via ``-k`` parameter.
+
+Typical usage is to do::
+
+    from unittest_helper import run_unittest
+    ...
+
+    if __name__ == "__main__":
+        run_unittest(__file__)
+
+If passing arguments is needed, on a more complex scenario, it can be
+used like on this example::
+
+    from unittest_helper import TestUnits, run_unittest
+    ...
+    env = {'sudo': ""}
+    ...
+    if __name__ == "__main__":
+        runner = TestUnits()
+        base_parser = runner.parse_args()
+        base_parser.add_argument('--sudo', action='store_true',
+                                help='Enable tests requiring sudo privileges')
+
+        args = base_parser.parse_args()
+
+        # Update module-level flag
+        if args.sudo:
+            env['sudo'] = "1"
+
+        # Run tests with customized arguments
+        runner.run(__file__, parser=base_parser, args=args, env=env)
+"""
+
+import argparse
+import atexit
+import os
+import re
+import unittest
+import sys
+
+from unittest.mock import patch
+
+
+class Summary(unittest.TestResult):
+    """
+    Overrides ``unittest.TestResult`` class to provide a nice colored
+    summary. When in verbose mode, displays actual/expected difference in
+    unified diff format.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        #: Dictionary to store organized test results.
+        self.test_results = {}
+
+        #: max length of the test names.
+        self.max_name_length = 0
+
+    def startTest(self, test):
+        super().startTest(test)
+        test_id = test.id()
+        parts = test_id.split(".")
+
+        # Extract module, class, and method names
+        if len(parts) >= 3:
+            module_name = parts[-3]
+        else:
+            module_name = ""
+        if len(parts) >= 2:
+            class_name = parts[-2]
+        else:
+            class_name = ""
+
+        method_name = parts[-1]
+
+        # Build the hierarchical structure
+        if module_name not in self.test_results:
+            self.test_results[module_name] = {}
+
+        if class_name not in self.test_results[module_name]:
+            self.test_results[module_name][class_name] = []
+
+        # Track maximum test name length for alignment
+        display_name = f"{method_name}:"
+
+        self.max_name_length = max(len(display_name), self.max_name_length)
+
+    def _record_test(self, test, status):
+        test_id = test.id()
+        parts = test_id.split(".")
+        if len(parts) >= 3:
+            module_name = parts[-3]
+        else:
+            module_name = ""
+        if len(parts) >= 2:
+            class_name = parts[-2]
+        else:
+            class_name = ""
+        method_name = parts[-1]
+        self.test_results[module_name][class_name].append((method_name, status))
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self._record_test(test, "OK")
+
+    def addFailure(self, test, err):
+        super().addFailure(test, err)
+        self._record_test(test, "FAIL")
+
+    def addError(self, test, err):
+        super().addError(test, err)
+        self._record_test(test, "ERROR")
+
+    def addSkip(self, test, reason):
+        super().addSkip(test, reason)
+        self._record_test(test, f"SKIP ({reason})")
+
+    def printResults(self):
+        """
+        Print results using colors if tty.
+        """
+        # Check for ANSI color support
+        use_color = sys.stdout.isatty()
+        COLORS = {
+            "OK":            "\033[32m",   # Green
+            "FAIL":          "\033[31m",   # Red
+            "SKIP":          "\033[1;33m", # Yellow
+            "PARTIAL":       "\033[33m",   # Orange
+            "EXPECTED_FAIL": "\033[36m",   # Cyan
+            "reset":         "\033[0m",    # Reset to default terminal color
+        }
+        if not use_color:
+            for c in COLORS:
+                COLORS[c] = ""
+
+        # Calculate maximum test name length
+        if not self.test_results:
+            return
+        try:
+            lengths = []
+            for module in self.test_results.values():
+                for tests in module.values():
+                    for test_name, _ in tests:
+                        lengths.append(len(test_name) + 1)  # +1 for colon
+            max_length = max(lengths) + 2  # Additional padding
+        except ValueError:
+            sys.exit("Test list is empty")
+
+        # Print results
+        for module_name, classes in self.test_results.items():
+            print(f"{module_name}:")
+            for class_name, tests in classes.items():
+                print(f"    {class_name}:")
+                for test_name, status in tests:
+                    # Get base status without reason for SKIP
+                    if status.startswith("SKIP"):
+                        status_code = status.split()[0]
+                    else:
+                        status_code = status
+                    color = COLORS.get(status_code, "")
+                    print(
+                        f"        {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}"
+                    )
+            print()
+
+        # Print summary
+        print(f"\nRan {self.testsRun} tests", end="")
+        if hasattr(self, "timeTaken"):
+            print(f" in {self.timeTaken:.3f}s", end="")
+        print()
+
+        if not self.wasSuccessful():
+            print(f"\n{COLORS['FAIL']}FAILED (", end="")
+            failures = getattr(self, "failures", [])
+            errors = getattr(self, "errors", [])
+            if failures:
+                print(f"failures={len(failures)}", end="")
+            if errors:
+                if failures:
+                    print(", ", end="")
+                print(f"errors={len(errors)}", end="")
+            print(f"){COLORS['reset']}")
+
+
+def flatten_suite(suite):
+    """Flatten test suite hierarchy."""
+    tests = []
+    for item in suite:
+        if isinstance(item, unittest.TestSuite):
+            tests.extend(flatten_suite(item))
+        else:
+            tests.append(item)
+    return tests
+
+
+class TestUnits:
+    """
+    Helper class to set verbosity level.
+
+    This class discover test files, import its unittest classes and
+    executes the test on it.
+    """
+    def parse_args(self):
+        """Returns a parser for command line arguments."""
+        parser = argparse.ArgumentParser(description="Test runner with regex filtering")
+        parser.add_argument("-v", "--verbose", action="count", default=1)
+        parser.add_argument("-f", "--failfast", action="store_true")
+        parser.add_argument("-k", "--keyword",
+                            help="Regex pattern to filter test methods")
+        return parser
+
+    def run(self, caller_file=None, pattern=None,
+            suite=None, parser=None, args=None, env=None):
+        """
+        Execute all tests from the unity test file.
+
+        It contains several optional parameters:
+
+        ``caller_file``:
+            -  name of the file that contains test.
+
+               typical usage is to place __file__ at the caller test, e.g.::
+
+                    if __name__ == "__main__":
+                        TestUnits().run(__file__)
+
+        ``pattern``:
+            - optional pattern to match multiple file names. Defaults
+              to basename of ``caller_file``.
+
+        ``suite``:
+            - an unittest suite initialized by the caller using
+              ``unittest.TestLoader().discover()``.
+
+        ``parser``:
+            - an argparse parser. If not defined, this helper will create
+              one.
+
+        ``args``:
+            - an ``argparse.Namespace`` data filled by the caller.
+
+        ``env``:
+            - environment variables that will be passed to the test suite
+
+        At least ``caller_file`` or ``suite`` must be used, otherwise a
+        ``TypeError`` will be raised.
+        """
+        if not args:
+            if not parser:
+                parser = self.parse_args()
+            args = parser.parse_args()
+
+        if not caller_file and not suite:
+            raise TypeError("Either caller_file or suite is needed at TestUnits")
+
+        verbose = args.verbose
+
+        if not env:
+            env = os.environ.copy()
+
+        env["VERBOSE"] = f"{verbose}"
+
+        patcher = patch.dict(os.environ, env)
+        patcher.start()
+        # ensure it gets stopped after
+        atexit.register(patcher.stop)
+
+
+        if verbose >= 2:
+            unittest.TextTestRunner(verbosity=verbose).run = lambda suite: suite
+
+        # Load ONLY tests from the calling file
+        if not suite:
+            if not pattern:
+                pattern = caller_file
+
+            loader = unittest.TestLoader()
+            suite = loader.discover(start_dir=os.path.dirname(caller_file),
+                                    pattern=os.path.basename(caller_file))
+
+        # Flatten the suite for environment injection
+        tests_to_inject = flatten_suite(suite)
+
+        # Filter tests by method name if -k specified
+        if args.keyword:
+            try:
+                pattern = re.compile(args.keyword)
+                filtered_suite = unittest.TestSuite()
+                for test in tests_to_inject:  # Use the pre-flattened list
+                    method_name = test.id().split(".")[-1]
+                    if pattern.search(method_name):
+                        filtered_suite.addTest(test)
+                suite = filtered_suite
+            except re.error as e:
+                sys.stderr.write(f"Invalid regex pattern: {e}\n")
+                sys.exit(1)
+        else:
+            # Maintain original suite structure if no keyword filtering
+            suite = unittest.TestSuite(tests_to_inject)
+
+        if verbose >= 2:
+            resultclass = None
+        else:
+            resultclass = Summary
+
+        runner = unittest.TextTestRunner(verbosity=args.verbose,
+                                            resultclass=resultclass,
+                                            failfast=args.failfast)
+        result = runner.run(suite)
+        if resultclass:
+            result.printResults()
+
+        sys.exit(not result.wasSuccessful())
+
+
+def run_unittest(fname):
+    """
+    Basic usage of TestUnits class.
+
+    Use it when there's no need to pass any extra argument to the tests
+    with. The recommended way is to place this at the end of each
+    unittest module::
+
+        if __name__ == "__main__":
+            run_unittest(__file__)
+    """
+    TestUnits().run(fname)
-- 
cgit v1.2.3


From b1e64e30fce86e61d3b09f9352b262622f3f0cda Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:23 +0100
Subject: docs: kdoc: don't add broken comments inside prototypes

Parsing a file like drivers/scsi/isci/host.h, which contains
broken kernel-doc markups makes it create a prototype that contains
unmatched end comments.

That causes, for instance, struct sci_power_control to be shown this
this prototype:

    struct sci_power_control {
        * it is not. */ bool timer_started;
        */ struct sci_timer timer;
        * requesters field. */ u8 phys_waiting;
        */ u8 phys_granted_power;
        * mapped into requesters via struct sci_phy.phy_index */ struct isci_phy *requesters[SCI_MAX_PHYS];
    };

as comments won't start with "/*" anymore.

Fix the logic to detect such cases, and keep adding the comments
inside it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <18e577dbbd538dcc22945ff139fe3638344e14f0.1773074166.git.mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <12ac4a97e2bd5a19d6537122c10098690c38d2c7.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index edf70ba139a5..086579d00b5c 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -1355,6 +1355,12 @@ class KernelDoc:
         elif doc_content.search(line):
             self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
             self.state = state.PROTO
+
+            #
+            # Don't let it add partial comments at the code, as breaks the
+            # logic meant to remove comments from prototypes.
+            #
+            self.process_proto_type(ln, "/**\n" + line)
         # else ... ??
 
     def process_inline_text(self, ln, line):
-- 
cgit v1.2.3


From d5265f7af2d284d5421b763f268157b5fa72f806 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:24 +0100
Subject: docs: kdoc: properly handle empty enum arguments

Depending on how the enum proto is written, a comma at the end
may incorrectly make kernel-doc parse an arg like " ".

Strip spaces before checking if arg is empty.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <4182bfb7e5f5b4bbaf05cee1bede691e56247eaf.1773074166.git.mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <640784283d52c5fc52ea597344ecd567e2fb6e22.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 086579d00b5c..4b3c555e6c8e 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -810,9 +810,10 @@ class KernelDoc:
         member_set = set()
         members = KernRe(r'\([^;)]*\)').sub('', members)
         for arg in members.split(','):
-            if not arg:
-                continue
             arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+            if not arg.strip():
+                continue
+
             self.entry.parameterlist.append(arg)
             if arg not in self.entry.parameterdescs:
                 self.entry.parameterdescs[arg] = self.undescribed
-- 
cgit v1.2.3


From df50e848f67523195ee0b4c6d2c01823e36a15e7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:25 +0100
Subject: docs: add a C tokenizer to be used by kernel-doc

Handling C code purely using regular expressions doesn't work well.

Add a C tokenizer to help doing it the right way.

The tokenizer was written using as basis the Python re documentation
tokenizer example from:
    https://docs.python.org/3/library/re.html#writing-a-tokenizer

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <39787bb8022e10c65df40c746077f7f66d07ffed.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 292 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 tools/lib/python/kdoc/c_lex.py

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
new file mode 100644
index 000000000000..9d726f821f3f
--- /dev/null
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+
+Please notice that the code here may rise exceptions to indicate bad
+usage inside kdoc to indicate problems at the replace pattern.
+
+Other errors are logged via log instance.
+"""
+
+import logging
+import re
+
+from .kdoc_re import KernRe
+
+log = logging.getLogger(__name__)
+
+
+class CToken():
+    """
+    Data class to define a C token.
+    """
+
+    # Tokens that can be used by the parser. Works like an C enum.
+
+    COMMENT = 0     #: A standard C or C99 comment, including delimiter.
+    STRING = 1      #: A string, including quotation marks.
+    CHAR = 2        #: A character, including apostophes.
+    NUMBER = 3      #: A number.
+    PUNC = 4        #: A puntuation mark: / ``,`` / ``.``.
+    BEGIN = 5       #: A begin character: ``{`` / ``[`` / ``(``.
+    END = 6         #: A end character: ``}`` / ``]`` / ``)``.
+    CPP = 7         #: A preprocessor macro.
+    HASH = 8        #: The hash character - useful to handle other macros.
+    OP = 9          #: A C operator (add, subtract, ...).
+    STRUCT = 10     #: A ``struct`` keyword.
+    UNION = 11      #: An ``union`` keyword.
+    ENUM = 12       #: A ``struct`` keyword.
+    TYPEDEF = 13    #: A ``typedef`` keyword.
+    NAME = 14       #: A name. Can be an ID or a type.
+    SPACE = 15      #: Any space characters, including new lines
+    ENDSTMT = 16    #: End of an statement (``;``).
+
+    BACKREF = 17    #: Not a valid C sequence, but used at sub regex patterns.
+
+    MISMATCH = 255  #: an error indicator: should never happen in practice.
+
+    # Dict to convert from an enum interger into a string.
+    _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)}
+
+    # Dict to convert from string to an enum-like integer value.
+    _name_to_val = {k: v for v, k in _name_by_val.items()}
+
+    @staticmethod
+    def to_name(val):
+        """Convert from an integer value from CToken enum into a string"""
+
+        return CToken._name_by_val.get(val, f"UNKNOWN({val})")
+
+    @staticmethod
+    def from_name(name):
+        """Convert a string into a CToken enum value"""
+        if name in CToken._name_to_val:
+            return CToken._name_to_val[name]
+
+        return CToken.MISMATCH
+
+
+    def __init__(self, kind, value=None, pos=0,
+                 brace_level=0, paren_level=0, bracket_level=0):
+        self.kind = kind
+        self.value = value
+        self.pos = pos
+        self.level = (bracket_level, paren_level, brace_level)
+
+    def __repr__(self):
+        name = self.to_name(self.kind)
+        if isinstance(self.value, str):
+            value = '"' + self.value + '"'
+        else:
+            value = self.value
+
+        return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})"
+
+#: Regexes to parse C code, transforming it into tokens.
+RE_SCANNER_LIST = [
+    #
+    # Note that \s\S is different than .*, as it also catches \n
+    #
+    (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
+
+    (CToken.STRING,  r'"(?:\\.|[^"\\])*"'),
+    (CToken.CHAR,    r"'(?:\\.|[^'\\])'"),
+
+    (CToken.NUMBER,  r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
+                     r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"),
+
+    (CToken.ENDSTMT, r"(?:\s+;|;)"),
+
+    (CToken.PUNC,    r"[,\.]"),
+
+    (CToken.BEGIN,   r"[\[\(\{]"),
+
+    (CToken.END,     r"[\]\)\}]"),
+
+    (CToken.CPP,     r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"),
+
+    (CToken.HASH,    r"#"),
+
+    (CToken.OP,      r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%="
+                     r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"),
+
+    (CToken.STRUCT,  r"\bstruct\b"),
+    (CToken.UNION,   r"\bunion\b"),
+    (CToken.ENUM,    r"\benum\b"),
+    (CToken.TYPEDEF, r"\btypedef\b"),
+
+    (CToken.NAME,    r"[A-Za-z_]\w*"),
+
+    (CToken.SPACE,   r"\s+"),
+
+    (CToken.BACKREF, r"\\\d+"),
+
+    (CToken.MISMATCH,r"."),
+]
+
+def fill_re_scanner(token_list):
+    """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex"""
+    re_tokens = []
+
+    for kind, pattern in token_list:
+        name = CToken.to_name(kind)
+        re_tokens.append(f"(?P<{name}>{pattern})")
+
+    return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL)
+
+#: Handle C continuation lines.
+RE_CONT = KernRe(r"\\\n")
+
+RE_COMMENT_START = KernRe(r'/\*\s*')
+
+#: tokenizer regex. Will be filled at the first CTokenizer usage.
+RE_SCANNER = fill_re_scanner(RE_SCANNER_LIST)
+
+
+class CTokenizer():
+    """
+    Scan C statements and definitions and produce tokens.
+
+    When converted to string, it drops comments and handle public/private
+    values, respecting depth.
+    """
+
+    # This class is inspired and follows the basic concepts of:
+    #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
+
+    def __init__(self, source=None, log=None):
+        """
+        Create a regular expression to handle RE_SCANNER_LIST.
+
+        While I generally don't like using regex group naming via:
+            (?P<name>...)
+
+        in this particular case, it makes sense, as we can pick the name
+        when matching a code via RE_SCANNER.
+        """
+
+        self.tokens = []
+
+        if not source:
+            return
+
+        if isinstance(source, list):
+            self.tokens = source
+            return
+
+        #
+        # While we could just use _tokenize directly via interator,
+        # As we'll need to use the tokenizer several times inside kernel-doc
+        # to handle macro transforms, cache the results on a list, as
+        # re-using it is cheaper than having to parse everytime.
+        #
+        for tok in self._tokenize(source):
+            self.tokens.append(tok)
+
+    def _tokenize(self, source):
+        """
+        Iterator that parses ``source``, splitting it into tokens, as defined
+        at ``self.RE_SCANNER_LIST``.
+
+        The interactor returns a CToken class object.
+        """
+
+        # Handle continuation lines. Note that kdoc_parser already has a
+        # logic to do that. Still, let's keep it for completeness, as we might
+        # end re-using this tokenizer outsize kernel-doc some day - or we may
+        # eventually remove from there as a future cleanup.
+        source = RE_CONT.sub("", source)
+
+        brace_level = 0
+        paren_level = 0
+        bracket_level = 0
+
+        for match in RE_SCANNER.finditer(source):
+            kind = CToken.from_name(match.lastgroup)
+            pos = match.start()
+            value = match.group()
+
+            if kind == CToken.MISMATCH:
+                log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'")
+            elif kind == CToken.BEGIN:
+                if value == '(':
+                    paren_level += 1
+                elif value == '[':
+                    bracket_level += 1
+                else:  # value == '{'
+                    brace_level += 1
+
+            elif kind == CToken.END:
+                if value == ')' and paren_level > 0:
+                    paren_level -= 1
+                elif value == ']' and bracket_level > 0:
+                    bracket_level -= 1
+                elif brace_level > 0:    # value == '}'
+                    brace_level -= 1
+
+            yield CToken(kind, value, pos,
+                         brace_level, paren_level, bracket_level)
+
+    def __str__(self):
+        out=""
+        show_stack = [True]
+
+        for i, tok in enumerate(self.tokens):
+            if tok.kind == CToken.BEGIN:
+                show_stack.append(show_stack[-1])
+
+            elif tok.kind == CToken.END:
+                prev = show_stack[-1]
+                if len(show_stack) > 1:
+                    show_stack.pop()
+
+                if not prev and show_stack[-1]:
+                    #
+                    # Try to preserve indent
+                    #
+                    out += "\t" * (len(show_stack) - 1)
+
+                    out += str(tok.value)
+                    continue
+
+            elif tok.kind == CToken.COMMENT:
+                comment = RE_COMMENT_START.sub("", tok.value)
+
+                if comment.startswith("private:"):
+                    show_stack[-1] = False
+                    show = False
+                elif comment.startswith("public:"):
+                    show_stack[-1] = True
+
+                continue
+
+            if not show_stack[-1]:
+                continue
+
+            if i < len(self.tokens) - 1:
+                next_tok = self.tokens[i + 1]
+
+                # Do some cleanups before ";"
+
+                if (tok.kind == CToken.SPACE and
+                    next_tok.kind == CToken.PUNC and
+                    next_tok.value == ";"):
+
+                    continue
+
+                if (tok.kind == CToken.PUNC and
+                    next_tok.kind == CToken.PUNC and
+                    tok.value == ";" and
+                    next_tok.kind == CToken.PUNC and
+                    next_tok.value == ";"):
+
+                    continue
+
+            out += str(tok.value)
+
+        return out
-- 
cgit v1.2.3


From cd77a9aa20ef53a03e5bb2630a5e7b16b910f198 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:26 +0100
Subject: docs: kdoc: use tokenizer to handle comments on structs

Better handle comments inside structs. After those changes,
all unittests now pass:

  test_private:
    TestPublicPrivate:
        test balanced_inner_private:                                 OK
        test balanced_non_greddy_private:                            OK
        test balanced_private:                                       OK
        test no private:                                             OK
        test unbalanced_inner_private:                               OK
        test unbalanced_private:                                     OK
        test unbalanced_struct_group_tagged_with_private:            OK
        test unbalanced_two_struct_group_tagged_first_with_private:  OK
        test unbalanced_without_end_of_line:                         OK

  Ran 9 tests

This also solves a bug when handling STRUCT_GROUP() with a private
comment on it:

	@@ -397134,7 +397134,7 @@ basic V4L2 device-level support.
	             unsigned int    max_len;
	             unsigned int    offset;
	             struct page_pool_params_slow  slow;
	-            STRUCT_GROUP( struct net_device *netdev;
	+            struct net_device *netdev;
	             unsigned int queue_idx;
	             unsigned int    flags;
	       };

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Message-ID: <f83ee9e8c38407eaab6ad10d4ccf155fb36683cc.1773074166.git.mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <054763260f7b5459ad0738ed906d7c358d640692.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 4b3c555e6c8e..62d8030cf532 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,6 +13,7 @@ import sys
 import re
 from pprint import pformat
 
+from kdoc.c_lex import CTokenizer
 from kdoc.kdoc_re import NestedMatch, KernRe
 from kdoc.kdoc_item import KdocItem
 
@@ -84,15 +85,9 @@ def trim_private_members(text):
     """
     Remove ``struct``/``enum`` members that have been marked "private".
     """
-    # First look for a "public:" block that ends a private region, then
-    # handle the "private until the end" case.
-    #
-    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
-    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
-    #
-    # We needed the comments to do the above, but now we can take them out.
-    #
-    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+    tokens = CTokenizer(text)
+    return str(tokens)
 
 class state:
     """
-- 
cgit v1.2.3


From f1cf9f7cd66f1f90c4c3beb0885b6f7771e1b419 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:30 +0100
Subject: docs: kdoc: create a CMatch to match nested C blocks

The NextMatch code is complex, and will become even more complex
if we add there support for arguments.

Now that we have a tokenizer, we can use a better solution,
easier to be understood.

Yet, to improve performance, it is better to make it use a
previously tokenized code, changing its ABI.

So, reimplement NextMatch using the CTokener class. Once it
is done, we can drop NestedMatch.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <fa818ea164216b17520b588e3f12b81499b76dd7.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 121 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 111 insertions(+), 10 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index 9d726f821f3f..5da472734ff7 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -273,20 +273,121 @@ class CTokenizer():
 
                 # Do some cleanups before ";"
 
-                if (tok.kind == CToken.SPACE and
-                    next_tok.kind == CToken.PUNC and
-                    next_tok.value == ";"):
-
+                if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT:
                     continue
 
-                if (tok.kind == CToken.PUNC and
-                    next_tok.kind == CToken.PUNC and
-                    tok.value == ";" and
-                    next_tok.kind == CToken.PUNC and
-                    next_tok.value == ";"):
-
+                if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind:
                     continue
 
             out += str(tok.value)
 
         return out
+
+
+class CMatch:
+    """
+    Finding nested delimiters is hard with regular expressions. It is
+    even harder on Python with its normal re module, as there are several
+    advanced regular expressions that are missing.
+
+    This is the case of this pattern::
+
+            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+    which is used to properly match open/close parentheses of the
+    string search STRUCT_GROUP(),
+
+    Add a class that counts pairs of delimiters, using it to match and
+    replace nested expressions.
+
+    The original approach was suggested by:
+
+        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+    Although I re-implemented it to make it more generic and match 3 types
+    of delimiters. The logic checks if delimiters are paired. If not, it
+    will ignore the search string.
+    """
+
+    # TODO: add a sub method
+
+    def __init__(self, regex):
+        self.regex = KernRe(regex)
+
+    def _search(self, tokenizer):
+        """
+        Finds paired blocks for a regex that ends with a delimiter.
+
+        The suggestion of using finditer to match pairs came from:
+        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+        but I ended using a different implementation to align all three types
+        of delimiters and seek for an initial regular expression.
+
+        The algorithm seeks for open/close paired delimiters and places them
+        into a stack, yielding a start/stop position of each match when the
+        stack is zeroed.
+
+        The algorithm should work fine for properly paired lines, but will
+        silently ignore end delimiters that precede a start delimiter.
+        This should be OK for kernel-doc parser, as unaligned delimiters
+        would cause compilation errors. So, we don't need to raise exceptions
+        to cover such issues.
+        """
+
+        start = None
+        offset = -1
+        started = False
+
+        import sys
+
+        stack = []
+
+        for i, tok in enumerate(tokenizer.tokens):
+            if start is None:
+                if tok.kind == CToken.NAME and self.regex.match(tok.value):
+                    start = i
+                    stack.append((start, tok.level))
+                    started = False
+
+                continue
+
+            if not started and tok.kind == CToken.BEGIN:
+                started = True
+                continue
+
+            if tok.kind == CToken.END and tok.level == stack[-1][1]:
+                start, level = stack.pop()
+                offset = i
+
+                yield CTokenizer(tokenizer.tokens[start:offset + 1])
+                start = None
+
+        #
+        # If an END zeroing levels is not there, return remaining stuff
+        # This is meant to solve cases where the caller logic might be
+        # picking an incomplete block.
+        #
+        if start and offset < 0:
+            print("WARNING: can't find an end", file=sys.stderr)
+            yield CTokenizer(tokenizer.tokens[start:])
+
+    def search(self, source):
+        """
+        This is similar to re.search:
+
+        It matches a regex that it is followed by a delimiter,
+        returning occurrences only if all delimiters are paired.
+        """
+
+        if isinstance(source, CTokenizer):
+            tokenizer = source
+            is_token = True
+        else:
+            tokenizer = CTokenizer(source)
+            is_token = False
+
+        for new_tokenizer in self._search(tokenizer):
+            if is_token:
+                yield new_tokenizer
+            else:
+                yield str(new_tokenizer)
-- 
cgit v1.2.3


From 9aaeb817ef4f794d1dbb8736332a64b5dae9521c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:32 +0100
Subject: docs: c_lex: properly implement a sub() method for CMatch

Implement a sub() method to do what it is expected, parsing
backref arguments like \0, \1, \2, ...

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <dbc45b86db18783289d94cfdbba4b72792c47929.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 272 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 259 insertions(+), 13 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index 5da472734ff7..20e50ff0ecd5 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -16,6 +16,8 @@ Other errors are logged via log instance.
 import logging
 import re
 
+from copy import copy
+
 from .kdoc_re import KernRe
 
 log = logging.getLogger(__name__)
@@ -284,6 +286,172 @@ class CTokenizer():
         return out
 
 
+class CTokenArgs:
+    """
+    Ancillary class to help using backrefs from sub matches.
+
+    If the highest backref contain a "+" at the last element,
+    the logic will be greedy, picking all other delims.
+
+    This is needed to parse struct_group macros with end with ``MEMBERS...``.
+    """
+    def __init__(self, sub_str):
+        self.sub_groups = set()
+        self.max_group = -1
+        self.greedy = None
+
+        for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str):
+            group = int(m.group(1))
+            if m.group(2) == "+":
+                if self.greedy and self.greedy != group:
+                    raise ValueError("There are multiple greedy patterns!")
+                self.greedy = group
+
+            self.sub_groups.add(group)
+            self.max_group = max(self.max_group, group)
+
+        if self.greedy:
+            if self.greedy != self.max_group:
+                raise ValueError("Greedy pattern is not the last one!")
+
+            sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str)
+
+        self.sub_str = sub_str
+        self.sub_tokeninzer = CTokenizer(sub_str)
+
+    def groups(self, new_tokenizer):
+        """
+        Create replacement arguments for backrefs like:
+
+        ``\0``, ``\1``, ``\2``, ...``\n``
+
+        It also accepts a ``+`` character to the highest backref. When used,
+        it means in practice to ignore delimins after it, being greedy.
+
+        The logic is smart enough to only go up to the maximum required
+        argument, even if there are more.
+
+        If there is a backref for an argument above the limit, it will
+        raise an exception. Please notice that, on C, square brackets
+        don't have any separator on it. Trying to use ``\1``..``\n`` for
+        brackets also raise an exception.
+        """
+
+        level = (0, 0, 0)
+
+        if self.max_group < 0:
+            return level, []
+
+        tokens = new_tokenizer.tokens
+
+        #
+        # Fill \0 with the full token contents
+        #
+        groups_list = [ [] ]
+
+        if 0 in self.sub_groups:
+            inner_level = 0
+
+            for i in range(0, len(tokens)):
+                tok = tokens[i]
+
+                if tok.kind == CToken.BEGIN:
+                    inner_level += 1
+
+                    #
+                    # Discard first begin
+                    #
+                    if not groups_list[0]:
+                        continue
+                elif tok.kind == CToken.END:
+                    inner_level -= 1
+                    if inner_level < 0:
+                        break
+
+                if inner_level:
+                    groups_list[0].append(tok)
+
+        if not self.max_group:
+            return level, groups_list
+
+        delim = None
+
+        #
+        # Ignore everything before BEGIN. The value of begin gives the
+        # delimiter to be used for the matches
+        #
+        for i in range(0, len(tokens)):
+            tok = tokens[i]
+            if tok.kind == CToken.BEGIN:
+                if tok.value == "{":
+                    delim = ";"
+                elif tok.value == "(":
+                    delim = ","
+                else:
+                    self.log.error(fr"Can't handle \1..\n on {sub_str}")
+
+                level = tok.level
+                break
+
+        pos = 1
+        groups_list.append([])
+
+        inner_level = 0
+        for i in range(i + 1, len(tokens)):
+            tok = tokens[i]
+
+            if tok.kind == CToken.BEGIN:
+                inner_level += 1
+            if tok.kind == CToken.END:
+                inner_level -= 1
+                if inner_level < 0:
+                    break
+
+            if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value:
+                pos += 1
+                if self.greedy and pos > self.max_group:
+                    pos -= 1
+                else:
+                    groups_list.append([])
+
+                    if pos > self.max_group:
+                        break
+
+                    continue
+
+            groups_list[pos].append(tok)
+
+        if pos < self.max_group:
+            log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}")
+
+        return level, groups_list
+
+    def tokens(self, new_tokenizer):
+        level, groups = self.groups(new_tokenizer)
+
+        new = CTokenizer()
+
+        for tok in self.sub_tokeninzer.tokens:
+            if tok.kind == CToken.BACKREF:
+                group = int(tok.value[1:])
+
+                for group_tok in groups[group]:
+                    new_tok = copy(group_tok)
+
+                    new_level = [0, 0, 0]
+
+                    for i in range(0, len(level)):
+                        new_level[i] = new_tok.level[i] + level[i]
+
+                    new_tok.level = tuple(new_level)
+
+                    new.tokens += [ new_tok ]
+            else:
+                new.tokens += [ tok ]
+
+        return new.tokens
+
+
 class CMatch:
     """
     Finding nested delimiters is hard with regular expressions. It is
@@ -309,10 +477,10 @@ class CMatch:
     will ignore the search string.
     """
 
-    # TODO: add a sub method
 
-    def __init__(self, regex):
-        self.regex = KernRe(regex)
+    def __init__(self, regex, delim="("):
+        self.regex = KernRe("^" + regex + r"\b")
+        self.start_delim = delim
 
     def _search(self, tokenizer):
         """
@@ -335,7 +503,6 @@ class CMatch:
         """
 
         start = None
-        offset = -1
         started = False
 
         import sys
@@ -351,15 +518,24 @@ class CMatch:
 
                 continue
 
-            if not started and tok.kind == CToken.BEGIN:
-                started = True
-                continue
+            if not started:
+                if tok.kind == CToken.SPACE:
+                    continue
+
+                if tok.kind == CToken.BEGIN and tok.value == self.start_delim:
+                    started = True
+                    continue
+
+                # Name only token without BEGIN/END
+                if i > start:
+                    i -= 1
+                yield start, i
+                start = None
 
             if tok.kind == CToken.END and tok.level == stack[-1][1]:
                 start, level = stack.pop()
-                offset = i
 
-                yield CTokenizer(tokenizer.tokens[start:offset + 1])
+                yield start, i
                 start = None
 
         #
@@ -367,9 +543,12 @@ class CMatch:
         # This is meant to solve cases where the caller logic might be
         # picking an incomplete block.
         #
-        if start and offset < 0:
-            print("WARNING: can't find an end", file=sys.stderr)
-            yield CTokenizer(tokenizer.tokens[start:])
+        if start and stack:
+            if started:
+                s = str(tokenizer)
+                log.warning(f"can't find a final end at {s}")
+
+            yield start, len(tokenizer.tokens)
 
     def search(self, source):
         """
@@ -386,8 +565,75 @@ class CMatch:
             tokenizer = CTokenizer(source)
             is_token = False
 
-        for new_tokenizer in self._search(tokenizer):
+        for start, end in self._search(tokenizer):
+            new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1])
+
             if is_token:
                 yield new_tokenizer
             else:
                 yield str(new_tokenizer)
+
+    def sub(self, sub_str, source, count=0):
+        """
+        This is similar to re.sub:
+
+        It matches a regex that it is followed by a delimiter,
+        replacing occurrences only if all delimiters are paired.
+
+        if the sub argument contains::
+
+            r'\0'
+
+        it will work just like re: it places there the matched paired data
+        with the delimiter stripped.
+
+        If count is different than zero, it will replace at most count
+        items.
+        """
+        if isinstance(source, CTokenizer):
+            is_token = True
+            tokenizer = source
+        else:
+            is_token = False
+            tokenizer = CTokenizer(source)
+
+        # Detect if sub_str contains sub arguments
+
+        args_match = CTokenArgs(sub_str)
+
+        new_tokenizer = CTokenizer()
+        pos = 0
+        n = 0
+
+        #
+        # NOTE: the code below doesn't consider overlays at sub.
+        # We may need to add some extra unit tests to check if those
+        # would cause problems. When replacing by "", this should not
+        # be a problem, but other transformations could be problematic
+        #
+        for start, end in self._search(tokenizer):
+            new_tokenizer.tokens += tokenizer.tokens[pos:start]
+
+            new = CTokenizer(tokenizer.tokens[start:end + 1])
+
+            new_tokenizer.tokens += args_match.tokens(new)
+
+            pos = end + 1
+
+            n += 1
+            if count and n >= count:
+                break
+
+        new_tokenizer.tokens += tokenizer.tokens[pos:]
+
+        if not is_token:
+            return str(new_tokenizer)
+
+        return new_tokenizer
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        return f'CMatch("{self.regex.regex.pattern}")'
-- 
cgit v1.2.3


From 600079fdcf46fafe15b4ccd62804d66e05309cc6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:34 +0100
Subject: docs: kdoc: replace NestedMatch with CMatch

Our previous approach to solve nested structs were to use
NestedMatch. It works well, but adding support to parse delimiters
is very complex.

Instead, use CMatch, which uses a C tokenizer, making the code more
reliable and simpler.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <900bff66f8093402999f9fe055fbfa3fa33a8d8b.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  |  2 +-
 tools/lib/python/kdoc/xforms_lists.py | 31 ++++++++++++++++---------------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 62d8030cf532..efd58c88ff31 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -14,7 +14,7 @@ import re
 from pprint import pformat
 
 from kdoc.c_lex import CTokenizer
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_re import KernRe
 from kdoc.kdoc_item import KdocItem
 
 #
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index c07cbe1e6349..7fa7f52cec7b 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -4,7 +4,8 @@
 
 import re
 
-from kdoc.kdoc_re import KernRe, NestedMatch
+from kdoc.kdoc_re import KernRe
+from kdoc.c_lex import CMatch
 
 struct_args_pattern = r'([^,)]+)'
 
@@ -60,7 +61,7 @@ class CTransforms:
         #
         # As it doesn't properly match the end parenthesis on some cases.
         #
-        # So, a better solution was crafted: there's now a NestedMatch
+        # So, a better solution was crafted: there's now a CMatch
         # class that ensures that delimiters after a search are properly
         # matched. So, the implementation to drop STRUCT_GROUP() will be
         # handled in separate.
@@ -72,9 +73,9 @@ class CTransforms:
         #
         # Replace macros
         #
-        # TODO: use NestedMatch for FOO($1, $2, ...) matches
+        # TODO: use CMatch for FOO($1, $2, ...) matches
         #
-        # it is better to also move those to the NestedMatch logic,
+        # it is better to also move those to the CMatch logic,
         # to ensure that parentheses will be properly matched.
         #
         (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
@@ -95,17 +96,17 @@ class CTransforms:
         (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
         (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
 
-        (NestedMatch(r"__cond_acquires\s*\("), ""),
-        (NestedMatch(r"__cond_releases\s*\("), ""),
-        (NestedMatch(r"__acquires\s*\("), ""),
-        (NestedMatch(r"__releases\s*\("), ""),
-        (NestedMatch(r"__must_hold\s*\("), ""),
-        (NestedMatch(r"__must_not_hold\s*\("), ""),
-        (NestedMatch(r"__must_hold_shared\s*\("), ""),
-        (NestedMatch(r"__cond_acquires_shared\s*\("), ""),
-        (NestedMatch(r"__acquires_shared\s*\("), ""),
-        (NestedMatch(r"__releases_shared\s*\("), ""),
-        (NestedMatch(r'\bSTRUCT_GROUP\('), r'\0'),
+        (CMatch(r"__cond_acquires"), ""),
+        (CMatch(r"__cond_releases"), ""),
+        (CMatch(r"__acquires"), ""),
+        (CMatch(r"__releases"), ""),
+        (CMatch(r"__must_hold"), ""),
+        (CMatch(r"__must_not_hold"), ""),
+        (CMatch(r"__must_hold_shared"), ""),
+        (CMatch(r"__cond_acquires_shared"), ""),
+        (CMatch(r"__acquires_shared"), ""),
+        (CMatch(r"__releases_shared"), ""),
+        (CMatch(r"STRUCT_GROUP"), r'\0'),
     ]
 
     #: Transforms for function prototypes.
-- 
cgit v1.2.3


From ae63a5b9203bcb3dce4819c07409b27734180eea Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:35 +0100
Subject: docs: kdoc_re: get rid of NestedMatch class

Now that everything was converted to CMatch, we can get rid of
the previous NestedMatch implementation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <c82dd0d2c0ab330fc04925965091c448ccabb8fd.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 201 ---------------------------------------
 1 file changed, 201 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 085b89a4547c..6f3ae28859ea 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -140,204 +140,3 @@ class KernRe:
         """
 
         return self.last_match.groups()
-
-#: Nested delimited pairs (brackets and parenthesis)
-DELIMITER_PAIRS = {
-    '{': '}',
-    '(': ')',
-    '[': ']',
-}
-
-#: compiled delimiters
-RE_DELIM = KernRe(r'[\{\}\[\]\(\)]')
-
-
-class NestedMatch:
-    """
-    Finding nested delimiters is hard with regular expressions. It is
-    even harder on Python with its normal re module, as there are several
-    advanced regular expressions that are missing.
-
-    This is the case of this pattern::
-
-            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
-
-    which is used to properly match open/close parentheses of the
-    string search STRUCT_GROUP(),
-
-    Add a class that counts pairs of delimiters, using it to match and
-    replace nested expressions.
-
-    The original approach was suggested by:
-
-        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
-
-    Although I re-implemented it to make it more generic and match 3 types
-    of delimiters. The logic checks if delimiters are paired. If not, it
-    will ignore the search string.
-    """
-
-    # TODO: make NestedMatch handle multiple match groups
-    #
-    # Right now, regular expressions to match it are defined only up to
-    #       the start delimiter, e.g.:
-    #
-    #       \bSTRUCT_GROUP\(
-    #
-    # is similar to: STRUCT_GROUP\((.*)\)
-    # except that the content inside the match group is delimiter-aligned.
-    #
-    # The content inside parentheses is converted into a single replace
-    # group (e.g. r`\0').
-    #
-    # It would be nice to change such definition to support multiple
-    # match groups, allowing a regex equivalent to:
-    #
-    #   FOO\((.*), (.*), (.*)\)
-    #
-    # it is probably easier to define it not as a regular expression, but
-    # with some lexical definition like:
-    #
-    #   FOO(arg1, arg2, arg3)
-
-    def __init__(self, regex):
-        self.regex = KernRe(regex)
-
-    def _search(self, line):
-        """
-        Finds paired blocks for a regex that ends with a delimiter.
-
-        The suggestion of using finditer to match pairs came from:
-        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
-        but I ended using a different implementation to align all three types
-        of delimiters and seek for an initial regular expression.
-
-        The algorithm seeks for open/close paired delimiters and places them
-        into a stack, yielding a start/stop position of each match when the
-        stack is zeroed.
-
-        The algorithm should work fine for properly paired lines, but will
-        silently ignore end delimiters that precede a start delimiter.
-        This should be OK for kernel-doc parser, as unaligned delimiters
-        would cause compilation errors. So, we don't need to raise exceptions
-        to cover such issues.
-        """
-
-        stack = []
-
-        for match_re in self.regex.finditer(line):
-            start = match_re.start()
-            offset = match_re.end()
-            string_char = None
-            escape = False
-
-            d = line[offset - 1]
-            if d not in DELIMITER_PAIRS:
-                continue
-
-            end = DELIMITER_PAIRS[d]
-            stack.append(end)
-
-            for match in RE_DELIM.finditer(line[offset:]):
-                pos = match.start() + offset
-
-                d = line[pos]
-
-                if escape:
-                    escape = False
-                    continue
-
-                if string_char:
-                    if d == '\\':
-                        escape = True
-                    elif d == string_char:
-                        string_char = None
-
-                    continue
-
-                if d in ('"', "'"):
-                    string_char = d
-                    continue
-
-                if d in DELIMITER_PAIRS:
-                    end = DELIMITER_PAIRS[d]
-
-                    stack.append(end)
-                    continue
-
-                # Does the end delimiter match what is expected?
-                if stack and d == stack[-1]:
-                    stack.pop()
-
-                    if not stack:
-                        yield start, offset, pos + 1
-                        break
-
-    def search(self, line):
-        """
-        This is similar to re.search:
-
-        It matches a regex that it is followed by a delimiter,
-        returning occurrences only if all delimiters are paired.
-        """
-
-        for t in self._search(line):
-
-            yield line[t[0]:t[2]]
-
-    def sub(self, sub, line, count=0):
-        """
-        This is similar to re.sub:
-
-        It matches a regex that it is followed by a delimiter,
-        replacing occurrences only if all delimiters are paired.
-
-        if the sub argument contains::
-
-            r'\0'
-
-        it will work just like re: it places there the matched paired data
-        with the delimiter stripped.
-
-        If count is different than zero, it will replace at most count
-        items.
-        """
-        out = ""
-
-        cur_pos = 0
-        n = 0
-
-        for start, end, pos in self._search(line):
-            out += line[cur_pos:start]
-
-            # Value, ignoring start/end delimiters
-            value = line[end:pos - 1]
-
-            # replaces \0 at the sub string, if \0 is used there
-            new_sub = sub
-            new_sub = new_sub.replace(r'\0', value)
-
-            out += new_sub
-
-            # Drop end ';' if any
-            if pos < len(line) and line[pos] == ';':
-                pos += 1
-
-            cur_pos = pos
-            n += 1
-
-            if count and count >= n:
-                break
-
-        # Append the remaining string
-        l = len(line)
-        out += line[cur_pos:l]
-
-        return out
-
-    def __repr__(self):
-        """
-        Returns a displayable version of the class init.
-        """
-
-        return f'NestedMatch("{self.regex.regex.pattern}")'
-- 
cgit v1.2.3


From f63e6163c7e4f988b2ff35721ffc86b95425293f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:36 +0100
Subject: docs: xforms_lists: handle struct_group directly

The previous logic was handling struct_group on two steps.
Remove the previous approach, as CMatch can do it the right
way on a single step.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <da7f879d90e3ffbc1f47771522f212a60df1fab6.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/xforms_lists.py | 53 ++++-------------------------------
 1 file changed, 6 insertions(+), 47 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 7fa7f52cec7b..98632c50a146 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -32,52 +32,6 @@ class CTransforms:
         (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
         (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
         (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-        #
-        # Unwrap struct_group macros based on this definition:
-        # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
-        # which has variants like: struct_group(NAME, MEMBERS...)
-        # Only MEMBERS arguments require documentation.
-        #
-        # Parsing them happens on two steps:
-        #
-        # 1. drop struct group arguments that aren't at MEMBERS,
-        #    storing them as STRUCT_GROUP(MEMBERS)
-        #
-        # 2. remove STRUCT_GROUP() ancillary macro.
-        #
-        # The original logic used to remove STRUCT_GROUP() using an
-        # advanced regex:
-        #
-        #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
-        #
-        # with two patterns that are incompatible with
-        # Python re module, as it has:
-        #
-        #   - a recursive pattern: (?1)
-        #   - an atomic grouping: (?>...)
-        #
-        # I tried a simpler version: but it didn't work either:
-        #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
-        #
-        # As it doesn't properly match the end parenthesis on some cases.
-        #
-        # So, a better solution was crafted: there's now a CMatch
-        # class that ensures that delimiters after a search are properly
-        # matched. So, the implementation to drop STRUCT_GROUP() will be
-        # handled in separate.
-        #
-        (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
-        (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
-        (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
-        (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
-        #
-        # Replace macros
-        #
-        # TODO: use CMatch for FOO($1, $2, ...) matches
-        #
-        # it is better to also move those to the CMatch logic,
-        # to ensure that parentheses will be properly matched.
-        #
         (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
         r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
         (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
@@ -106,7 +60,12 @@ class CTransforms:
         (CMatch(r"__cond_acquires_shared"), ""),
         (CMatch(r"__acquires_shared"), ""),
         (CMatch(r"__releases_shared"), ""),
-        (CMatch(r"STRUCT_GROUP"), r'\0'),
+
+        (CMatch('struct_group'), r'\2'),
+        (CMatch('struct_group_attr'), r'\3'),
+        (CMatch('struct_group_tagged'), r'struct \1 \2; \3'),
+        (CMatch('__struct_group'), r'\4'),
+
     ]
 
     #: Transforms for function prototypes.
-- 
cgit v1.2.3


From 2f07ddbd5793df4ec24f727322cc68065feb3568 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:37 +0100
Subject: docs: xforms_lists: better evaluate struct_group macros

The previous approach were to unwind nested structs/unions.

Now that we have a logic that can handle it well, use it to
ensure that struct_group macros will properly reflect the
actual struct.

Note that the replacemend logic still simplifies the code
a little bit, as the basic build block for struct group is:

	union { \
		struct { MEMBERS } ATTRS; \
		struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
	} ATTRS

There:

- ATTRS is meant to add extra macro attributes like __packed
  which we already discard, as they aren't relevant to
  document struct members;

- TAG is used only when built with __cplusplus.

So, instead, convert them into just:

    struct { MEMBERS };

Please notice that here, we're using the greedy version of the
backrefs, as MEMBERS is actually MEMBERS... on all such macros.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <24bf2c036b08814d9b4aabc27542fd3b2ff54424.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/xforms_lists.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 98632c50a146..2056572852fd 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -61,10 +61,16 @@ class CTransforms:
         (CMatch(r"__acquires_shared"), ""),
         (CMatch(r"__releases_shared"), ""),
 
-        (CMatch('struct_group'), r'\2'),
-        (CMatch('struct_group_attr'), r'\3'),
-        (CMatch('struct_group_tagged'), r'struct \1 \2; \3'),
-        (CMatch('__struct_group'), r'\4'),
+        #
+        # Macro __struct_group() creates an union with an anonymous
+        # and a non-anonymous struct, depending on the parameters. We only
+        # need one of those at kernel-doc, as we won't be documenting the same
+        # members twice.
+        #
+        (CMatch('struct_group'), r'struct { \2+ };'),
+        (CMatch('struct_group_attr'), r'struct { \3+ };'),
+        (CMatch('struct_group_tagged'), r'struct { \3+ };'),
+        (CMatch('__struct_group'), r'struct { \4+ };'),
 
     ]
 
-- 
cgit v1.2.3


From 024e200e2a89d71dceff7d1bff8ae77b145726e0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:38 +0100
Subject: docs: c_lex: setup a logger to report tokenizer issues

Report file that has issues detected via CMatch and CTokenizer.

This is done by setting up a logger that will be overriden by
kdoc_parser, when used on it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <903ad83ae176196a50444e66177a4f5bcdef5199.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py       | 16 ++++++++++++++++
 tools/lib/python/kdoc/kdoc_parser.py |  4 +++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index 20e50ff0ecd5..b6d58bd470a9 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -22,6 +22,22 @@ from .kdoc_re import KernRe
 
 log = logging.getLogger(__name__)
 
+def tokenizer_set_log(logger, prefix = ""):
+    """
+    Replace the module‑level logger with a LoggerAdapter that
+    prepends *prefix* to every message.
+    """
+    global log
+
+    class PrefixAdapter(logging.LoggerAdapter):
+        """
+        Ancillary class to set prefix on all message logs.
+        """
+        def process(self, msg, kwargs):
+            return f"{prefix}{msg}", kwargs
+
+    # Wrap the provided logger in our adapter
+    log = PrefixAdapter(logger, {"prefix": prefix})
 
 class CToken():
     """
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index efd58c88ff31..f90c6dd0343d 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,7 @@ import sys
 import re
 from pprint import pformat
 
-from kdoc.c_lex import CTokenizer
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
 from kdoc.kdoc_re import KernRe
 from kdoc.kdoc_item import KdocItem
 
@@ -253,6 +253,8 @@ class KernelDoc:
         self.config = config
         self.xforms = xforms
 
+        tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
+
         # Initial state for the state machines
         self.state = state.NORMAL
 
-- 
cgit v1.2.3


From 12aa7753ff4c5fea405d139bcf67f49bda2c932e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:40 +0100
Subject: docs: kdoc: ensure that comments are dropped before calling
 split_struct_proto()

Changeset 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes")
revealed a hidden bug at split_struct_proto(): some comments there may break
its capability of properly identifying a struct.

Fixing it is as simple as stripping comments before calling it.

Fixes: 2b957decdb6c ("docs: kdoc: don't add broken comments inside prototypes")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <dcff37b6da5329aea415de31f543b6a1c2cbbbce.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index f90c6dd0343d..8b2c9d0f0c58 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -723,6 +723,7 @@ class KernelDoc:
         #
         # Do the basic parse to get the pieces of the declaration.
         #
+        proto = trim_private_members(proto)
         struct_parts = self.split_struct_proto(proto)
         if not struct_parts:
             self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
@@ -763,6 +764,7 @@ class KernelDoc:
         # Strip preprocessor directives.  Note that this depends on the
         # trailing semicolon we added in process_proto_type().
         #
+        proto = trim_private_members(proto)
         proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
         #
         # Parse out the name and members of the enum.  Typedef form first.
@@ -770,7 +772,7 @@ class KernelDoc:
         r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
         if r.search(proto):
             declaration_name = r.group(2)
-            members = trim_private_members(r.group(1))
+            members = r.group(1)
         #
         # Failing that, look for a straight enum
         #
@@ -778,7 +780,7 @@ class KernelDoc:
             r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
             if r.match(proto):
                 declaration_name = r.group(1)
-                members = trim_private_members(r.group(2))
+                members = r.group(2)
         #
         # OK, this isn't going to work.
         #
-- 
cgit v1.2.3


From 79d881beb721d27f679f0dc1cba2d5fe2d7f6d8d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:41 +0100
Subject: docs: kdoc_parser: avoid tokenizing structs everytime

Most of the rules inside CTransforms are of the type CMatch.

Don't re-parse the source code every time.

Doing this doesn't change the output, but makes kdoc almost
as fast as before the tokenizer patches:

    # Before tokenizer patches
    $ time ./scripts/kernel-doc . -man >original 2>&1

    real    0m42.933s
    user    0m36.523s
    sys     0m1.145s

    # After tokenizer patches
    $ time ./scripts/kernel-doc . -man >before 2>&1

    real    1m29.853s
    user    1m23.974s
    sys     0m1.237s

    # After this patch
    $ time ./scripts/kernel-doc . -man >after 2>&1

    real    0m48.579s
    user    0m45.938s
    sys     0m0.988s

    $ diff -s before after
    Files before and after are identical

Manually checked the differences between original and after
with:

    $ diff -U0 -prBw original after|grep -v Warning|grep -v "@@"|less

They're due:
  - whitespace fixes;
  - struct_group are now better handled;
  - several badly-generated man pages from broken inline kernel-doc
    markups are now fixed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <1cc2a4286ebf7d4b2d03fcaf42a1ba9fa09004b9.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_parser.py  |  1 -
 tools/lib/python/kdoc/xforms_lists.py | 30 ++++++++++++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 8b2c9d0f0c58..f6c4ee3b18c9 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -737,7 +737,6 @@ class KernelDoc:
         #
         # Go through the list of members applying all of our transformations.
         #
-        members = trim_private_members(members)
         members = self.xforms.apply("struct", members)
 
         #
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 2056572852fd..5a62d4a450cb 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -5,7 +5,7 @@
 import re
 
 from kdoc.kdoc_re import KernRe
-from kdoc.c_lex import CMatch
+from kdoc.c_lex import CMatch, CTokenizer
 
 struct_args_pattern = r'([^,)]+)'
 
@@ -16,6 +16,12 @@ class CTransforms:
     into something we can parse and generate kdoc for.
     """
 
+    #
+    # NOTE:
+    #      Due to performance reasons, place CMatch rules before KernRe,
+    #      as this avoids running the C parser every time.
+    #
+
     #: Transforms for structs and unions.
     struct_xforms = [
         # Strip attributes
@@ -124,13 +130,25 @@ class CTransforms:
         "var": var_xforms,
     }
 
-    def apply(self, xforms_type, text):
+    def apply(self, xforms_type, source):
         """
-        Apply a set of transforms to a block of text.
+        Apply a set of transforms to a block of source.
+
+        As tokenizer is used here, this function also remove comments
+        at the end.
         """
         if xforms_type not in self.xforms:
-            return text
+            return source
+
+        if isinstance(source, str):
+            source = CTokenizer(source)
 
         for search, subst in self.xforms[xforms_type]:
-            text = search.sub(subst, text)
-        return text
+            #
+            # KernRe only accept strings.
+            #
+            if isinstance(search, KernRe):
+                source = str(source)
+
+            source = search.sub(subst, source)
+        return str(source)
-- 
cgit v1.2.3


From 7538df7a2d7d26428803cf8053476169a6d28659 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 17 Mar 2026 19:09:42 +0100
Subject: docs: xforms_lists: use CMatch for all identifiers

CMatch is lexically correct and replaces only identifiers,
which is exactly where macro transformations happen.

Use it to make the output safer and ensure that all arguments
will be parsed the right way, even on complex cases.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <86d4a07ff0e054207747fabf38d6bb261b52b5fa.1773770483.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/xforms_lists.py | 159 +++++++++++++++++-----------------
 1 file changed, 79 insertions(+), 80 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
index 5a62d4a450cb..f6ea9efb11ae 100644
--- a/tools/lib/python/kdoc/xforms_lists.py
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -7,7 +7,8 @@ import re
 from kdoc.kdoc_re import KernRe
 from kdoc.c_lex import CMatch, CTokenizer
 
-struct_args_pattern = r'([^,)]+)'
+struct_args_pattern = r"([^,)]+)"
+
 
 class CTransforms:
     """
@@ -24,48 +25,40 @@ class CTransforms:
 
     #: Transforms for structs and unions.
     struct_xforms = [
-        # Strip attributes
-        (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
-        (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
-        (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
-        (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
-        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '),
-        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '),
-        (KernRe(r'\s*__packed\s*', re.S), ' '),
-        (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
-        (KernRe(r'\s*__private', re.S), ' '),
-        (KernRe(r'\s*__rcu', re.S), ' '),
-        (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
-        (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
-        (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-        (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
-        r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
-        (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
-        r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
-        (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-                re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
-        (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-                re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
-        (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
-                r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-        (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
-                struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-        (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
-                struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
-        (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
-        (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
-        (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
-
-        (CMatch(r"__cond_acquires"), ""),
-        (CMatch(r"__cond_releases"), ""),
-        (CMatch(r"__acquires"), ""),
-        (CMatch(r"__releases"), ""),
-        (CMatch(r"__must_hold"), ""),
-        (CMatch(r"__must_not_hold"), ""),
-        (CMatch(r"__must_hold_shared"), ""),
-        (CMatch(r"__cond_acquires_shared"), ""),
-        (CMatch(r"__acquires_shared"), ""),
-        (CMatch(r"__releases_shared"), ""),
+        (CMatch("__attribute__"), ""),
+        (CMatch("__aligned"), ""),
+        (CMatch("__counted_by"), ""),
+        (CMatch("__counted_by_(le|be)"), ""),
+        (CMatch("__guarded_by"), ""),
+        (CMatch("__pt_guarded_by"), ""),
+        (CMatch("__packed"), ""),
+        (CMatch("CRYPTO_MINALIGN_ATTR"), ""),
+        (CMatch("__private"), ""),
+        (CMatch("__rcu"), ""),
+        (CMatch("____cacheline_aligned_in_smp"), ""),
+        (CMatch("____cacheline_aligned"), ""),
+        (CMatch("__cacheline_group_(?:begin|end)"), ""),
+        (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"),
+        (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"),
+        (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"),
+        (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"),
+        (CMatch("DECLARE_KFIFO"), r"\2 *\1"),
+        (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"),
+        (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"),
+        (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"),
+        (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"),
+        (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"),
+        (CMatch("__cond_acquires"), ""),
+        (CMatch("__cond_releases"), ""),
+        (CMatch("__acquires"), ""),
+        (CMatch("__releases"), ""),
+        (CMatch("__must_hold"), ""),
+        (CMatch("__must_not_hold"), ""),
+        (CMatch("__must_hold_shared"), ""),
+        (CMatch("__cond_acquires_shared"), ""),
+        (CMatch("__acquires_shared"), ""),
+        (CMatch("__releases_shared"), ""),
+        (CMatch("__attribute__"), ""),
 
         #
         # Macro __struct_group() creates an union with an anonymous
@@ -73,51 +66,57 @@ class CTransforms:
         # need one of those at kernel-doc, as we won't be documenting the same
         # members twice.
         #
-        (CMatch('struct_group'), r'struct { \2+ };'),
-        (CMatch('struct_group_attr'), r'struct { \3+ };'),
-        (CMatch('struct_group_tagged'), r'struct { \3+ };'),
-        (CMatch('__struct_group'), r'struct { \4+ };'),
-
+        (CMatch("struct_group"), r"struct { \2+ };"),
+        (CMatch("struct_group_attr"), r"struct { \3+ };"),
+        (CMatch("struct_group_tagged"), r"struct { \3+ };"),
+        (CMatch("__struct_group"), r"struct { \4+ };"),
     ]
 
     #: Transforms for function prototypes.
     function_xforms = [
-        (KernRe(r"^static +"), ""),
-        (KernRe(r"^extern +"), ""),
-        (KernRe(r"^asmlinkage +"), ""),
-        (KernRe(r"^inline +"), ""),
-        (KernRe(r"^__inline__ +"), ""),
-        (KernRe(r"^__inline +"), ""),
-        (KernRe(r"^__always_inline +"), ""),
-        (KernRe(r"^noinline +"), ""),
-        (KernRe(r"^__FORTIFY_INLINE +"), ""),
-        (KernRe(r"__init +"), ""),
-        (KernRe(r"__init_or_module +"), ""),
-        (KernRe(r"__exit +"), ""),
-        (KernRe(r"__deprecated +"), ""),
-        (KernRe(r"__flatten +"), ""),
-        (KernRe(r"__meminit +"), ""),
-        (KernRe(r"__must_check +"), ""),
-        (KernRe(r"__weak +"), ""),
-        (KernRe(r"__sched +"), ""),
-        (KernRe(r"_noprof"), ""),
-        (KernRe(r"__always_unused *"), ""),
-        (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
-        (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
-        (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
-        (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
-        (KernRe(r"__no_context_analysis\s*"), ""),
-        (KernRe(r"__attribute_const__ +"), ""),
-        (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
+        (CMatch("static"), ""),
+        (CMatch("extern"), ""),
+        (CMatch("asmlinkage"), ""),
+        (CMatch("inline"), ""),
+        (CMatch("__inline__"), ""),
+        (CMatch("__inline"), ""),
+        (CMatch("__always_inline"), ""),
+        (CMatch("noinline"), ""),
+        (CMatch("__FORTIFY_INLINE"), ""),
+        (CMatch("__init"), ""),
+        (CMatch("__init_or_module"), ""),
+        (CMatch("__exit"), ""),
+        (CMatch("__deprecated"), ""),
+        (CMatch("__flatten"), ""),
+        (CMatch("__meminit"), ""),
+        (CMatch("__must_check"), ""),
+        (CMatch("__weak"), ""),
+        (CMatch("__sched"), ""),
+        (CMatch("__always_unused"), ""),
+        (CMatch("__printf"), ""),
+        (CMatch("__(?:re)?alloc_size"), ""),
+        (CMatch("__diagnose_as"), ""),
+        (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"),
+        (CMatch("__no_context_analysis"), ""),
+        (CMatch("__attribute_const__"), ""),
+        (CMatch("__attribute__"), ""),
+
+        #
+        # HACK: this is similar to process_export() hack. It is meant to
+        # drop _noproof from function name. See for instance:
+        # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h.
+        #
+        (KernRe("_noprof"), ""),
     ]
 
     #: Transforms for variable prototypes.
     var_xforms = [
-        (KernRe(r"__read_mostly"), ""),
-        (KernRe(r"__ro_after_init"), ""),
-        (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""),
-        (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""),
-        (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
+        (CMatch("__read_mostly"), ""),
+        (CMatch("__ro_after_init"), ""),
+        (CMatch("__guarded_by"), ""),
+        (CMatch("__pt_guarded_by"), ""),
+        (CMatch("LIST_HEAD"), r"struct list_head \1"),
+
         (KernRe(r"(?://.*)$"), ""),
         (KernRe(r"(?:/\*.*\*/)"), ""),
         (KernRe(r";$"), ""),
-- 
cgit v1.2.3


From b2d231f4a77800661b3fb812d997841a548c6526 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:02 +0100
Subject: docs: kdoc_re: better represent long regular expressions

The Sphinx output from autodoc doesn't automatically break long
lines, except on spaces.

Change KernRe __repr__() to break the pattern on multiple strings,
each one with a maximum limit of 60 characters.

With that, documentation output for KernRe should now be displayable,
even on long strings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <60c264a9d277fed655b1a62df2195562c8596090.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_re.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 6f3ae28859ea..28292efe25a2 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -70,10 +70,15 @@ class KernRe:
 
         flags_name = " | ".join(flags)
 
+        max_len = 60
+        pattern = ""
+        for pos in range(0, len(self.regex.pattern), max_len):
+            pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" '
+
         if flags_name:
-            return f'KernRe("{self.regex.pattern}", {flags_name})'
+            return f'KernRe({pattern}, {flags_name})'
         else:
-            return f'KernRe("{self.regex.pattern}")'
+            return f'KernRe({pattern})'
 
     def __add__(self, other):
         """
-- 
cgit v1.2.3


From 8c0b7c0d3c0e640b3ebb7f1f648ea322e56c227a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:03 +0100
Subject: docs: kdoc: add c_lex to generated documentation

Do some fixes at groups() description for it to be parsed by
Sphinx and add it to the documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <799178cf30dd4022fdb1d029ba998a458e037b52.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index b6d58bd470a9..e01b154f458e 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -336,13 +336,14 @@ class CTokenArgs:
         self.sub_tokeninzer = CTokenizer(sub_str)
 
     def groups(self, new_tokenizer):
-        """
+        r"""
         Create replacement arguments for backrefs like:
 
-        ``\0``, ``\1``, ``\2``, ...``\n``
+        ``\0``, ``\1``, ``\2``, ... ``\{number}``
 
-        It also accepts a ``+`` character to the highest backref. When used,
-        it means in practice to ignore delimins after it, being greedy.
+        It also accepts a ``+`` character to the highest backref, like
+        ``\4+``. When used, the backref will be greedy, picking all other
+        arguments afterwards.
 
         The logic is smart enough to only go up to the maximum required
         argument, even if there are more.
-- 
cgit v1.2.3


From e0ebee442d56c11df023b7c2d32edc3b0765b2f3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:04 +0100
Subject: docs: kdoc_files: use a class to group config parameters

Instead of abusing argparse.Namespace, define a class to store
configuration parameters and logger.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <a66ec9872c72a3ba1a5ac567881d67dc8ee581c6.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py | 45 +++++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 14 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 8c2059623949..1c5cb9e5f0e8 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -9,7 +9,6 @@ Classes for navigating through the files that kernel-doc needs to handle
 to generate documentation.
 """
 
-import argparse
 import logging
 import os
 import re
@@ -87,6 +86,28 @@ class GlobSourceFiles:
                 file_not_found_cb(fname)
 
 
+class KdocConfig():
+    """
+    Stores all configuration attributes that kdoc_parser and kdoc_output
+    needs.
+    """
+    def __init__(self, verbose=False, werror=False, wreturn=False,
+                 wshort_desc=False, wcontents_before_sections=False,
+                 logger=None):
+
+        self.verbose = verbose
+        self.werror = werror
+        self.wreturn = wreturn
+        self.wshort_desc =  wshort_desc
+        self.wcontents_before_sections = wcontents_before_sections
+
+        if logger:
+            self.log = logger
+        else:
+            self.log = logging.getLogger(__file__)
+
+        self.warning = self.log.warning
+
 class KernelFiles():
     """
     Parse kernel-doc tags on multiple kernel source files.
@@ -224,29 +245,25 @@ class KernelFiles():
             if kdoc_werror:
                 werror = kdoc_werror
 
+        if not logger:
+           logger = logging.getLogger("kernel-doc")
+        else:
+            logger = logger
+
         # Some variables are global to the parser logic as a whole as they are
         # used to send control configuration to KernelDoc class. As such,
         # those variables are read-only inside the KernelDoc.
-        self.config = argparse.Namespace
+        self.config = KdocConfig(verbose, werror, wreturn, wshort_desc,
+                                 wcontents_before_sections, logger)
 
-        self.config.verbose = verbose
-        self.config.werror = werror
-        self.config.wreturn = wreturn
-        self.config.wshort_desc = wshort_desc
-        self.config.wcontents_before_sections = wcontents_before_sections
+        # Override log warning, as we want to count errors
+        self.config.warning = self.warning
 
         if xforms:
             self.xforms = xforms
         else:
             self.xforms = CTransforms()
 
-        if not logger:
-            self.config.log = logging.getLogger("kernel-doc")
-        else:
-            self.config.log = logger
-
-        self.config.warning = self.warning
-
         self.config.src_tree = os.environ.get("SRCTREE", None)
 
         # Initialize variables that are internal to KernelFiles
-- 
cgit v1.2.3


From 9ab2ca3dd127194a55bd9789c031e800fd19c254 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:05 +0100
Subject: docs: kdoc_files: move output symbols logic to kdoc_output

When writing unittests for kdoc_output, it became clear that
the logic with handles a series of KdocItem symbols from
a single file belons to kdoc_output, and not to kdoc_files.

Move the code to it.

While here, also ensure that self.config will be placed
together with set.out_style.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <4ebc26e37a0b544c50d50b8077760f147fa6a535.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py  | 18 ++----------------
 tools/lib/python/kdoc/kdoc_output.py | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 1c5cb9e5f0e8..58f4ee08e226 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -269,6 +269,7 @@ class KernelFiles():
         # Initialize variables that are internal to KernelFiles
 
         self.out_style = out_style
+        self.out_style.set_config(self.config)
 
         self.errors = 0
         self.results = {}
@@ -311,8 +312,6 @@ class KernelFiles():
         returning kernel-doc markups on each interaction.
         """
 
-        self.out_style.set_config(self.config)
-
         if not filenames:
             filenames = sorted(self.results.keys())
 
@@ -336,25 +335,12 @@ class KernelFiles():
                                       function_table, enable_lineno,
                                       no_doc_sections)
 
-            msg = ""
             if fname not in self.results:
                 self.config.log.warning("No kernel-doc for file %s", fname)
                 continue
 
             symbols = self.results[fname]
-            self.out_style.set_symbols(symbols)
-
-            for arg in symbols:
-                m = self.out_msg(fname, arg.name, arg)
-
-                if m is None:
-                    ln = arg.get("ln", 0)
-                    dtype = arg.get('type', "")
-
-                    self.config.log.warning("%s:%d Can't handle %s",
-                                            fname, ln, dtype)
-                else:
-                    msg += m
 
+            msg = self.out_style.output_symbols(fname, symbols)
             if msg:
                 yield fname, msg
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 08539dd92cbb..73d71cbeabb5 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -222,6 +222,27 @@ class OutputFormat:
 
         return None
 
+    def output_symbols(self, fname, symbols):
+        """
+        Handles a set of KdocItem symbols.
+        """
+        self.set_symbols(symbols)
+
+        msg = ""
+        for arg in symbols:
+            m = self.msg(fname, arg.name, arg)
+
+            if m is None:
+                ln = arg.get("ln", 0)
+                dtype = arg.get('type', "")
+
+                self.config.log.warning("%s:%d Can't handle %s",
+                                        fname, ln, dtype)
+            else:
+                msg += m
+
+        return msg
+
     # Virtual methods to be overridden by inherited classes
     # At the base class, those do nothing.
     def set_symbols(self, symbols):
-- 
cgit v1.2.3


From 01c41b99c66ff26a102edbc4f9dff9c74692723e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:06 +0100
Subject: docs: kdoc_item: fix initial value for parameterdesc_start_lines

Ensure that parameterdesc_start_lines is a dict at init time,
as this is how it will be set later on at the parser.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <1b4ea24dd4cd82e6711e9be80168684427d74c30.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_item.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index 2b8a93f79716..c0585cdbcbd1 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -22,7 +22,7 @@ class KdocItem:
         self.sections = {}
         self.sections_start_lines = {}
         self.parameterlist = []
-        self.parameterdesc_start_lines = []
+        self.parameterdesc_start_lines = {}
         self.parameterdescs = {}
         self.parametertypes = {}
         #
-- 
cgit v1.2.3


From 99364ba7f8dca5c1c2d08fe37c5835b86be141f4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:07 +0100
Subject: docs: kdoc_item: add support to generate a KdocItem from a dict

When reading the contents on a KdocItem using YAML, the data
will be imported into a dict.

Add a method to create a new KdocItem from a dict to allow
converting such input into a real KdocItem.

While here, address an issue that, if the class is initialized
with an internal parameter outside the 4 initial arguments,
it would end being added inside other_stuff, which breaks
initializing it from a dict.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <fafeac23d1577927e1a3c32cddfbec1e0209ac73.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_item.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index c0585cdbcbd1..5f41790efacb 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -25,12 +25,31 @@ class KdocItem:
         self.parameterdesc_start_lines = {}
         self.parameterdescs = {}
         self.parametertypes = {}
+
+        self.warnings = []
+
         #
         # Just save everything else into our own dict so that the output
         # side can grab it directly as before.  As we move things into more
         # structured data, this will, hopefully, fade away.
         #
-        self.other_stuff = other_stuff
+        known_keys = {
+            'declaration_start_line',
+            'sections',
+            'sections_start_lines',
+            'parameterlist',
+            'parameterdesc_start_lines',
+            'parameterdescs',
+            'parametertypes',
+            'warnings',
+        }
+
+        self.other_stuff = {}
+        for k, v in other_stuff.items():
+            if k in known_keys:
+                setattr(self, k, v)           # real attribute
+            else:
+                self.other_stuff[k] = v
 
     def get(self, key, default = None):
         """
@@ -41,6 +60,20 @@ class KdocItem:
     def __getitem__(self, key):
         return self.get(key)
 
+    @classmethod
+    def from_dict(cls, d):
+        """Create a KdocItem from a plain dict."""
+
+        cp = d.copy()
+        name        = cp.pop('name', None)
+        fname       = cp.pop('fname', None)
+        type        = cp.pop('type', None)
+        start_line  = cp.pop('start_line', 1)
+        other_stuff = cp.pop('other_stuff', {})
+
+        # Everything that’s left goes straight to __init__
+        return cls(name, fname, type, start_line, **cp, **other_stuff)
+
     #
     # Tracking of section and parameter information.
     #
-- 
cgit v1.2.3


From e394855fcc897f73f23c364a3a596b54cc879e4c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:08 +0100
Subject: docs: kdoc_item: fix a typo on sections_start_lines

Currently, there are 15 occurrences of section?_start_lines,
with 10 using the plural way.

This is an issue, as, while kdoc_output works with KdocItem,
the term doesn't match its init value.

The variable sections_start_lines stores multiple sections,
so placing it in plural is its correct way.

So, ensure that, on all parts of kdoc, this will be referred
as sections_start_lines.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <d1e0f1d3f80df41c11a1bbde6a12fd9468bc3813.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_item.py   | 2 +-
 tools/lib/python/kdoc/kdoc_output.py | 2 +-
 tools/lib/python/kdoc/kdoc_parser.py | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index 5f41790efacb..fe08cac861c2 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -82,7 +82,7 @@ class KdocItem:
         Set sections and start lines.
         """
         self.sections = sections
-        self.section_start_lines = start_lines
+        self.sections_start_lines = start_lines
 
     def set_params(self, names, descs, types, starts):
         """
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 73d71cbeabb5..1b54117dbe19 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -389,7 +389,7 @@ class RestFormat(OutputFormat):
             else:
                 self.data += f'{self.lineprefix}**{section}**\n\n'
 
-            self.print_lineno(args.section_start_lines.get(section, 0))
+            self.print_lineno(args.sections_start_lines.get(section, 0))
             self.output_highlight(text)
             self.data += "\n"
         self.data += "\n"
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index f6c4ee3b18c9..35658a7e72d5 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -140,7 +140,7 @@ class KernelEntry:
         self.parametertypes = {}
         self.parameterdesc_start_lines = {}
 
-        self.section_start_lines = {}
+        self.sections_start_lines = {}
         self.sections = {}
 
         self.anon_struct_union = False
@@ -220,7 +220,7 @@ class KernelEntry:
                 self.sections[name] += '\n' + contents
             else:
                 self.sections[name] = contents
-                self.section_start_lines[name] = self.new_start_line
+                self.sections_start_lines[name] = self.new_start_line
                 self.new_start_line = 0
 
 #        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
@@ -316,7 +316,7 @@ class KernelDoc:
         for section in ["Description", "Return"]:
             if section in sections and not sections[section].rstrip():
                 del sections[section]
-        item.set_sections(sections, self.entry.section_start_lines)
+        item.set_sections(sections, self.entry.sections_start_lines)
         item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
                         self.entry.parametertypes,
                         self.entry.parameterdesc_start_lines)
-- 
cgit v1.2.3


From b37b3cbbb1f1a99bc8b95d9f00fcf887c27f4770 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:13 +0100
Subject: docs: add a new file to write kernel-doc output to a YAML file

Storing kernel-doc output is helpful to allow debugging problems
on it and to preparate unit tests.

Add a class to store such contents at the same format as defined
at kdoc-test-schema.yaml.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <5d084ca1a91f6a620534a1135d1b8183d934319a.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_yaml_file.py | 155 ++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 tools/lib/python/kdoc/kdoc_yaml_file.py

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
new file mode 100644
index 000000000000..db131503c3f6
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import os
+
+from kdoc.kdoc_output import ManFormat, RestFormat
+
+
+class KDocTestFile():
+    """
+    Handles the logic needed to store kernel‑doc output inside a YAML file.
+     Useful for unit tests and regression tests.
+    """
+
+    def __init__(self, config, yaml_file, yaml_content):
+        #
+        # Bail out early if yaml is not available
+        #
+        try:
+            import yaml
+        except ImportError:
+            sys.exit("Warning: yaml package not available. Aborting it.")
+
+        self.config = config
+        self.test_file = os.path.expanduser(yaml_file)
+        self.yaml_content = yaml_content
+
+        self.tests = []
+
+        out_dir = os.path.dirname(self.test_file)
+        if out_dir and not os.path.isdir(out_dir):
+            sys.exit(f"Directory {out_dir} doesn't exist.")
+
+        self.out_style = []
+
+        if "man" in self.yaml_content:
+            out_style = ManFormat()
+            out_style.set_config(self.config)
+
+            self.out_style.append(out_style)
+
+        if "rst" in self.yaml_content:
+            out_style = RestFormat()
+            out_style.set_config(self.config)
+
+            self.out_style.append(out_style)
+
+    def set_filter(self, export, internal, symbol, nosymbol,
+                   function_table, enable_lineno, no_doc_sections):
+        """
+        Set filters at the output classes.
+        """
+        for out_style in self.out_style:
+            out_style.set_filter(export, internal, symbol,
+                                 nosymbol, function_table,
+                                 enable_lineno, no_doc_sections)
+
+    @staticmethod
+    def get_kdoc_item(arg, start_line=1):
+
+        d = vars(arg)
+
+        declaration_start_line = d.get("declaration_start_line")
+        if not declaration_start_line:
+            return d
+
+        d["declaration_start_line"] = start_line
+
+        parameterdesc_start_lines = d.get("parameterdesc_start_lines")
+        if parameterdesc_start_lines:
+            for key in parameterdesc_start_lines:
+                ln = parameterdesc_start_lines[key]
+                ln += start_line - declaration_start_line
+
+                parameterdesc_start_lines[key] = ln
+
+        sections_start_lines = d.get("sections_start_lines")
+        if sections_start_lines:
+            for key in sections_start_lines:
+                ln = sections_start_lines[key]
+                ln += start_line - declaration_start_line
+
+                sections_start_lines[key] = ln
+
+        return d
+
+    def output_symbols(self, fname, symbols, source):
+        """
+        Store source, symbols and output strings at self.tests.
+        """
+
+        #
+        # KdocItem needs to be converted into dicts
+        #
+        kdoc_item = []
+        expected = []
+
+        if not symbols and not source:
+            return
+
+        if not source or len(symbols) != len(source):
+            print(f"Warning: lengths are different. Ignoring {fname}")
+
+            # Folding without line numbers is too hard.
+            # The right thing to do here to proceed would be to delete
+            # not-handled source blocks, as len(source) should be bigger
+            # than len(symbols)
+            return
+
+        base_name = "test_" + fname.replace(".", "_").replace("/", "_")
+        expected_dict = {}
+        start_line=1
+
+        for i in range(0, len(symbols)):
+            arg = symbols[i]
+
+            if "KdocItem" in self.yaml_content:
+                msg = self.get_kdoc_item(arg)
+
+                expected_dict["kdoc_item"] = msg
+
+            for out_style in self.out_style:
+                if isinstance(out_style, ManFormat):
+                    key = "man"
+                else:
+                    key = "rst"
+
+                expected_dict[key]= out_style.output_symbols(fname, [arg])
+
+            name = f"{base_name}_{i:03d}"
+
+            test = {
+                "name": name,
+                "description": f"{fname} line {source[i]["ln"]}",
+                "fname": fname,
+                "source": source[i]["data"],
+                "expected": [expected_dict]
+            }
+
+            self.tests.append(test)
+
+            expected_dict = {}
+
+    def write(self):
+        """
+        Output the content of self.tests to self.test_file.
+        """
+        import yaml
+
+        data = {"tests": self.tests}
+
+        with open(self.test_file, "w", encoding="utf-8") as fp:
+            yaml.safe_dump(data, fp, sort_keys=False, default_style="|",
+                           default_flow_style=False, allow_unicode=True)
-- 
cgit v1.2.3


From 01d6d7bf9672f1aeabbffaa3fbfb8017223ff878 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 18 Mar 2026 10:11:14 +0100
Subject: docs: kernel-doc: add support to store output on a YAML file

Add a command line parameter and library support to optionally
store:
- KdocItem intermediate format after parsing;
- man pages output;
- rst output.

inside a YAML file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <ba54277b3c909867153b9547dfa33c1831ca35d9.1773823995.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py  | 47 +++++++++++++++++++++++++++++++-----
 tools/lib/python/kdoc/kdoc_parser.py | 27 ++++++++++++++++++++-
 2 files changed, 67 insertions(+), 7 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 58f4ee08e226..5a299ed44d62 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -16,6 +16,7 @@ import re
 from kdoc.kdoc_parser import KernelDoc
 from kdoc.xforms_lists import CTransforms
 from kdoc.kdoc_output import OutputFormat
+from kdoc.kdoc_yaml_file import KDocTestFile
 
 
 class GlobSourceFiles:
@@ -152,6 +153,12 @@ class KernelFiles():
 
         If not specified, defaults to use: ``logging.getLogger("kernel-doc")``
 
+    ``yaml_file``
+        If defined, stores the output inside a YAML file.
+
+    ``yaml_content``
+        Defines what will be inside the YAML file.
+
     Note:
         There are two type of parsers defined here:
 
@@ -181,7 +188,12 @@ class KernelFiles():
         if fname in self.files:
             return
 
-        doc = KernelDoc(self.config, fname, self.xforms)
+        if self.test_file:
+            store_src = True
+        else:
+            store_src = False
+
+        doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src)
         export_table, entries = doc.parse_kdoc()
 
         self.export_table[fname] = export_table
@@ -191,6 +203,10 @@ class KernelFiles():
 
         self.results[fname] = entries
 
+        source = doc.get_source()
+        if source:
+            self.source[fname] = source
+
     def process_export_file(self, fname):
         """
         Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
@@ -220,7 +236,7 @@ class KernelFiles():
     def __init__(self, verbose=False, out_style=None, xforms=None,
                  werror=False, wreturn=False, wshort_desc=False,
                  wcontents_before_sections=False,
-                 logger=None):
+                 yaml_file=None, yaml_content=None, logger=None):
         """
         Initialize startup variables and parse all files.
         """
@@ -259,6 +275,11 @@ class KernelFiles():
         # Override log warning, as we want to count errors
         self.config.warning = self.warning
 
+        if yaml_file:
+            self.test_file = KDocTestFile(self.config, yaml_file, yaml_content)
+        else:
+            self.test_file = None
+
         if xforms:
             self.xforms = xforms
         else:
@@ -273,6 +294,7 @@ class KernelFiles():
 
         self.errors = 0
         self.results = {}
+        self.source = {}
 
         self.files = set()
         self.export_files = set()
@@ -331,16 +353,29 @@ class KernelFiles():
                 for s in symbol:
                     function_table.add(s)
 
-            self.out_style.set_filter(export, internal, symbol, nosymbol,
-                                      function_table, enable_lineno,
-                                      no_doc_sections)
-
             if fname not in self.results:
                 self.config.log.warning("No kernel-doc for file %s", fname)
                 continue
 
             symbols = self.results[fname]
 
+            if self.test_file:
+                self.test_file.set_filter(export, internal, symbol, nosymbol,
+                                          function_table, enable_lineno,
+                                          no_doc_sections)
+
+                self.test_file.output_symbols(fname, symbols,
+                                              self.source.get(fname))
+
+                continue
+
+            self.out_style.set_filter(export, internal, symbol, nosymbol,
+                                      function_table, enable_lineno,
+                                      no_doc_sections)
+
             msg = self.out_style.output_symbols(fname, symbols)
             if msg:
                 yield fname, msg
+
+        if self.test_file:
+            self.test_file.write()
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 35658a7e72d5..a10e64589d76 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -246,12 +246,13 @@ class KernelDoc:
     #: String to write when a parameter is not described.
     undescribed = "-- undescribed --"
 
-    def __init__(self, config, fname, xforms):
+    def __init__(self, config, fname, xforms, store_src=False):
         """Initialize internal variables"""
 
         self.fname = fname
         self.config = config
         self.xforms = xforms
+        self.store_src = store_src
 
         tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
 
@@ -264,6 +265,9 @@ class KernelDoc:
         # Place all potential outputs into an array
         self.entries = []
 
+        # When store_src is true, the kernel-doc source content is stored here
+        self.source = None
+
         #
         # We need Python 3.7 for its "dicts remember the insertion
         # order" guarantee
@@ -1592,6 +1596,15 @@ class KernelDoc:
         state.DOCBLOCK:			process_docblock,
         }
 
+    def get_source(self):
+        """
+        Return the file content of the lines handled by kernel-doc at the
+        latest parse_kdoc() run.
+
+        Returns none if KernelDoc() was not initialized with store_src,
+        """
+        return self.source
+
     def parse_kdoc(self):
         """
         Open and process each line of a C source file.
@@ -1605,6 +1618,8 @@ class KernelDoc:
         prev = ""
         prev_ln = None
         export_table = set()
+        self.source = []
+        self.state = state.NORMAL
 
         try:
             with open(self.fname, "r", encoding="utf8",
@@ -1631,6 +1646,8 @@ class KernelDoc:
                                           ln, state.name[self.state],
                                           line)
 
+                    prev_state = self.state
+
                     # This is an optimization over the original script.
                     # There, when export_file was used for the same file,
                     # it was read twice. Here, we use the already-existing
@@ -1641,6 +1658,14 @@ class KernelDoc:
                         # Hand this line to the appropriate state handler
                         self.state_actions[self.state](self, ln, line)
 
+                    if self.store_src and prev_state != self.state or self.state != state.NORMAL:
+                        if self.state == state.NAME:
+                            # A "/**" was detected. Add a new source element
+                            self.source.append({"ln": ln, "data": line + "\n"})
+                        else:
+                            # Append to the existing one
+                            self.source[-1]["data"] += line + "\n"
+
             self.emit_unused_warnings()
 
         except OSError:
-- 
cgit v1.2.3


From 6e0d7b63676b85490bbaf01c9a8ebcd692bed981 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:47 +0100
Subject: docs: kdoc_yaml_file: add a representer to make strings look nicer

The strings representation is not ok, currently. Add a helper
function to improve it, and drop blank lines at beginning and
at the end of the dumps

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <422041a8b49b2609de5749092fe074b7948c32a6.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_yaml_file.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
index db131503c3f6..18737abb1176 100644
--- a/tools/lib/python/kdoc/kdoc_yaml_file.py
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -126,7 +126,7 @@ class KDocTestFile():
                 else:
                     key = "rst"
 
-                expected_dict[key]= out_style.output_symbols(fname, [arg])
+                expected_dict[key]= out_style.output_symbols(fname, [arg]).strip()
 
             name = f"{base_name}_{i:03d}"
 
@@ -148,8 +148,20 @@ class KDocTestFile():
         """
         import yaml
 
+        # Helper function to better handle multilines
+        def str_presenter(dumper, data):
+            if "\n" in data:
+                return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+
+            return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+        # Register the representer
+        yaml.add_representer(str, str_presenter)
+
         data = {"tests": self.tests}
 
         with open(self.test_file, "w", encoding="utf-8") as fp:
-            yaml.safe_dump(data, fp, sort_keys=False, default_style="|",
-                           default_flow_style=False, allow_unicode=True)
+            yaml.dump(data, fp,
+                      sort_keys=False, width=120, indent=2,
+                      default_flow_style=False, allow_unicode=True,
+                      explicit_start=False, explicit_end=False)
-- 
cgit v1.2.3


From 8326e4a21838593fe67b5d79ba6d0dc8e962ebb9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:49 +0100
Subject: docs: kdoc_output: fix handling of simple tables

Fix check for simple table delimiters.

ReST simple tables use "=" instead of "-". I ended testing it with
a table modified from a complex one, using "--- --- ---", instead
of searching for a real Kernel example.

Only noticed when adding an unit test and seek for an actual
example from kernel-doc markups.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <dea95337c05040f95e5a95ae41d69ddef0aaa8d6.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 1b54117dbe19..2bfcd356654b 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -846,14 +846,14 @@ class ManFormat(OutputFormat):
         colspec_row = None
 
         pos = []
-        for m in KernRe(r'\-+').finditer(lines[i]):
+        for m in KernRe(r'\=+').finditer(lines[i]):
             pos.append((m.start(), m.end() - 1))
 
         i += 1
         while i < len(lines):
             line = lines[i]
 
-            if KernRe(r"^\s*[\-]+[ \t\-]+$").match(line):
+            if KernRe(r"^\s*[\=]+[ \t\=]+$").match(line):
                 i += 1
                 break
 
@@ -969,7 +969,7 @@ class ManFormat(OutputFormat):
                     self.data += text
                     continue
 
-                if KernRe(r"^\-+[ \t]\-[ \t\-]+$").match(line):
+                if KernRe(r"^\=+[ \t]\=[ \t\=]+$").match(line):
                     i, text = self.simple_table(lines, i)
                     self.data += text
                     continue
-- 
cgit v1.2.3


From 99ec67a9984fdf38c7ed78695aeb1b99cfee5b50 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:50 +0100
Subject: docs: kdoc: better handle source when producing YAML output

The current logic was storing symbols source code on a list,
not linked to the actual KdocItem. While this works fine when
kernel-doc markups are OK, on places where there is a "/**"
without a valid kernel-doc markup, it ends that the 1:1 match
between source code and KdocItem doesn't happen, causing
problems to generate the YAML output.

Fix it by storing the source code directly into the KdocItem
structure.

This shouldn't affect performance or memory footprint, except
when --yaml option is used.

While here, add a __repr__() function for KdocItem, as it
helps debugging it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <77902dafabb5c3250486aa2dc1568d5fafa95c5b.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py     |   8 +--
 tools/lib/python/kdoc/kdoc_item.py      |   6 +-
 tools/lib/python/kdoc/kdoc_parser.py    | 100 ++++++++++++++++----------------
 tools/lib/python/kdoc/kdoc_yaml_file.py |  28 ++++-----
 4 files changed, 70 insertions(+), 72 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 5a299ed44d62..2428cfc4e843 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -203,10 +203,6 @@ class KernelFiles():
 
         self.results[fname] = entries
 
-        source = doc.get_source()
-        if source:
-            self.source[fname] = source
-
     def process_export_file(self, fname):
         """
         Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
@@ -294,7 +290,6 @@ class KernelFiles():
 
         self.errors = 0
         self.results = {}
-        self.source = {}
 
         self.files = set()
         self.export_files = set()
@@ -364,8 +359,7 @@ class KernelFiles():
                                           function_table, enable_lineno,
                                           no_doc_sections)
 
-                self.test_file.output_symbols(fname, symbols,
-                                              self.source.get(fname))
+                self.test_file.output_symbols(fname, symbols)
 
                 continue
 
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index fe08cac861c2..a7aa6e1e4c1c 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -14,7 +14,8 @@ class KdocItem:
     then pass into the output modules.
     """
 
-    def __init__(self, name, fname, type, start_line, **other_stuff):
+    def __init__(self, name, fname, type, start_line,
+                 **other_stuff):
         self.name = name
         self.fname = fname
         self.type = type
@@ -60,6 +61,9 @@ class KdocItem:
     def __getitem__(self, key):
         return self.get(key)
 
+    def __repr__(self):
+        return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})"
+
     @classmethod
     def from_dict(cls, d):
         """Create a KdocItem from a plain dict."""
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index a10e64589d76..74af7ae47aa4 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -265,9 +265,6 @@ class KernelDoc:
         # Place all potential outputs into an array
         self.entries = []
 
-        # When store_src is true, the kernel-doc source content is stored here
-        self.source = None
-
         #
         # We need Python 3.7 for its "dicts remember the insertion
         # order" guarantee
@@ -720,13 +717,14 @@ class KernelDoc:
         return declaration
 
 
-    def dump_struct(self, ln, proto):
+    def dump_struct(self, ln, proto, source):
         """
         Store an entry for a ``struct`` or ``union``
         """
         #
         # Do the basic parse to get the pieces of the declaration.
         #
+        source = source
         proto = trim_private_members(proto)
         struct_parts = self.split_struct_proto(proto)
         if not struct_parts:
@@ -756,10 +754,11 @@ class KernelDoc:
                                    declaration_name)
         self.check_sections(ln, declaration_name, decl_type)
         self.output_declaration(decl_type, declaration_name,
+                                source=source,
                                 definition=self.format_struct_decl(declaration),
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_enum(self, ln, proto):
+    def dump_enum(self, ln, proto, source):
         """
         Store an ``enum`` inside self.entries array.
         """
@@ -767,6 +766,7 @@ class KernelDoc:
         # Strip preprocessor directives.  Note that this depends on the
         # trailing semicolon we added in process_proto_type().
         #
+        source = source
         proto = trim_private_members(proto)
         proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
         #
@@ -831,9 +831,10 @@ class KernelDoc:
                               f"Excess enum value '@{k}' description in '{declaration_name}'")
 
         self.output_declaration('enum', declaration_name,
+                                source=source,
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_var(self, ln, proto):
+    def dump_var(self, ln, proto, source):
         """
         Store variables that are part of kAPI.
         """
@@ -846,6 +847,7 @@ class KernelDoc:
         #
         # Store the full prototype before modifying it
         #
+        source = source
         full_proto = proto
         declaration_name = None
 
@@ -895,32 +897,34 @@ class KernelDoc:
             default_val = default_val.lstrip("=").strip()
 
         self.output_declaration("var", declaration_name,
+                                source=source,
                                 full_proto=full_proto,
                                 default_val=default_val,
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_declaration(self, ln, prototype):
+    def dump_declaration(self, ln, prototype, source):
         """
         Store a data declaration inside self.entries array.
         """
 
         if self.entry.decl_type == "enum":
-            self.dump_enum(ln, prototype)
+            self.dump_enum(ln, prototype, source)
         elif self.entry.decl_type == "typedef":
-            self.dump_typedef(ln, prototype)
+            self.dump_typedef(ln, prototype, source)
         elif self.entry.decl_type in ["union", "struct"]:
-            self.dump_struct(ln, prototype)
+            self.dump_struct(ln, prototype, source)
         elif self.entry.decl_type == "var":
-            self.dump_var(ln, prototype)
+            self.dump_var(ln, prototype, source)
         else:
             # This would be a bug
             self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
 
-    def dump_function(self, ln, prototype):
+    def dump_function(self, ln, prototype, source):
         """
         Store a function or function macro inside self.entries array.
         """
 
+        source = source
         found = func_macro = False
         return_type = ''
         decl_type = 'function'
@@ -1013,13 +1017,14 @@ class KernelDoc:
         # Store the result.
         #
         self.output_declaration(decl_type, declaration_name,
+                                source=source,
                                 typedef=('typedef' in return_type),
                                 functiontype=return_type,
                                 purpose=self.entry.declaration_purpose,
                                 func_macro=func_macro)
 
 
-    def dump_typedef(self, ln, proto):
+    def dump_typedef(self, ln, proto, source):
         """
         Store a ``typedef`` inside self.entries array.
         """
@@ -1030,6 +1035,8 @@ class KernelDoc:
         typedef_ident = r'\*?\s*(\w\S+)\s*'
         typedef_args = r'\s*\((.*)\);'
 
+        source = source
+
         typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
         typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
 
@@ -1050,6 +1057,7 @@ class KernelDoc:
             self.create_parameter_list(ln, 'function', args, ',', declaration_name)
 
             self.output_declaration('function', declaration_name,
+                                    source=source,
                                     typedef=True,
                                     functiontype=return_type,
                                     purpose=self.entry.declaration_purpose)
@@ -1067,6 +1075,7 @@ class KernelDoc:
                 return
 
             self.output_declaration('typedef', declaration_name,
+                                    source=source,
                                     purpose=self.entry.declaration_purpose)
             return
 
@@ -1104,7 +1113,7 @@ class KernelDoc:
         function_set.add(symbol)
         return True
 
-    def process_normal(self, ln, line):
+    def process_normal(self, ln, line, source):
         """
         STATE_NORMAL: looking for the ``/**`` to begin everything.
         """
@@ -1118,7 +1127,7 @@ class KernelDoc:
         # next line is always the function name
         self.state = state.NAME
 
-    def process_name(self, ln, line):
+    def process_name(self, ln, line, source):
         """
         STATE_NAME: Looking for the "name - description" line
         """
@@ -1251,7 +1260,7 @@ class KernelDoc:
         return False
 
 
-    def process_decl(self, ln, line):
+    def process_decl(self, ln, line, source):
         """
         STATE_DECLARATION: We've seen the beginning of a declaration.
         """
@@ -1280,7 +1289,7 @@ class KernelDoc:
             self.emit_msg(ln, f"bad line: {line}")
 
 
-    def process_special(self, ln, line):
+    def process_special(self, ln, line, source):
         """
         STATE_SPECIAL_SECTION: a section ending with a blank line.
         """
@@ -1331,7 +1340,7 @@ class KernelDoc:
             # Unknown line, ignore
             self.emit_msg(ln, f"bad line: {line}")
 
-    def process_body(self, ln, line):
+    def process_body(self, ln, line, source):
         """
         STATE_BODY: the bulk of a kerneldoc comment.
         """
@@ -1345,7 +1354,7 @@ class KernelDoc:
             # Unknown line, ignore
             self.emit_msg(ln, f"bad line: {line}")
 
-    def process_inline_name(self, ln, line):
+    def process_inline_name(self, ln, line, source):
         """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
 
         if doc_inline_sect.search(line):
@@ -1363,10 +1372,10 @@ class KernelDoc:
             # Don't let it add partial comments at the code, as breaks the
             # logic meant to remove comments from prototypes.
             #
-            self.process_proto_type(ln, "/**\n" + line)
+            self.process_proto_type(ln, "/**\n" + line, source)
         # else ... ??
 
-    def process_inline_text(self, ln, line):
+    def process_inline_text(self, ln, line, source):
         """STATE_INLINE_TEXT: docbook comments within a prototype."""
 
         if doc_inline_end.search(line):
@@ -1452,7 +1461,7 @@ class KernelDoc:
 
         return proto
 
-    def process_proto_function(self, ln, line):
+    def process_proto_function(self, ln, line, source):
         """Ancillary routine to process a function prototype."""
 
         # strip C99-style comments to end of line
@@ -1494,10 +1503,10 @@ class KernelDoc:
             #
             # ... and we're done
             #
-            self.dump_function(ln, self.entry.prototype)
+            self.dump_function(ln, self.entry.prototype, source)
             self.reset_state(ln)
 
-    def process_proto_type(self, ln, line):
+    def process_proto_type(self, ln, line, source):
         """
         Ancillary routine to process a type.
         """
@@ -1527,7 +1536,7 @@ class KernelDoc:
                 elif chunk == '}':
                     self.entry.brcount -= 1
                 elif chunk == ';' and self.entry.brcount <= 0:
-                    self.dump_declaration(ln, self.entry.prototype)
+                    self.dump_declaration(ln, self.entry.prototype, source)
                     self.reset_state(ln)
                     return
         #
@@ -1536,7 +1545,7 @@ class KernelDoc:
         #
         self.entry.prototype += ' '
 
-    def process_proto(self, ln, line):
+    def process_proto(self, ln, line, source):
         """STATE_PROTO: reading a function/whatever prototype."""
 
         if doc_inline_oneline.search(line):
@@ -1548,17 +1557,18 @@ class KernelDoc:
             self.state = state.INLINE_NAME
 
         elif self.entry.decl_type == 'function':
-            self.process_proto_function(ln, line)
+            self.process_proto_function(ln, line, source)
 
         else:
-            self.process_proto_type(ln, line)
+            self.process_proto_type(ln, line, source)
 
-    def process_docblock(self, ln, line):
+    def process_docblock(self, ln, line, source):
         """STATE_DOCBLOCK: within a ``DOC:`` block."""
 
         if doc_end.search(line):
             self.dump_section()
-            self.output_declaration("doc", self.entry.identifier)
+            self.output_declaration("doc", self.entry.identifier,
+                                    source=source)
             self.reset_state(ln)
 
         elif doc_content.search(line):
@@ -1596,15 +1606,6 @@ class KernelDoc:
         state.DOCBLOCK:			process_docblock,
         }
 
-    def get_source(self):
-        """
-        Return the file content of the lines handled by kernel-doc at the
-        latest parse_kdoc() run.
-
-        Returns none if KernelDoc() was not initialized with store_src,
-        """
-        return self.source
-
     def parse_kdoc(self):
         """
         Open and process each line of a C source file.
@@ -1618,8 +1619,8 @@ class KernelDoc:
         prev = ""
         prev_ln = None
         export_table = set()
-        self.source = []
         self.state = state.NORMAL
+        source = ""
 
         try:
             with open(self.fname, "r", encoding="utf8",
@@ -1646,7 +1647,11 @@ class KernelDoc:
                                           ln, state.name[self.state],
                                           line)
 
-                    prev_state = self.state
+                    if self.store_src:
+                        if source and self.state == state.NORMAL:
+                            source = ""
+                        elif self.state != state.NORMAL:
+                            source += line + "\n"
 
                     # This is an optimization over the original script.
                     # There, when export_file was used for the same file,
@@ -1655,16 +1660,11 @@ class KernelDoc:
                     #
                     if (self.state != state.NORMAL) or \
                        not self.process_export(export_table, line):
+                        prev_state = self.state
                         # Hand this line to the appropriate state handler
-                        self.state_actions[self.state](self, ln, line)
-
-                    if self.store_src and prev_state != self.state or self.state != state.NORMAL:
-                        if self.state == state.NAME:
-                            # A "/**" was detected. Add a new source element
-                            self.source.append({"ln": ln, "data": line + "\n"})
-                        else:
-                            # Append to the existing one
-                            self.source[-1]["data"] += line + "\n"
+                        self.state_actions[self.state](self, ln, line, source)
+                        if prev_state == state.NORMAL and self.state != state.NORMAL:
+                            source += line + "\n"
 
             self.emit_unused_warnings()
 
diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
index 18737abb1176..1e2ae7c59d70 100644
--- a/tools/lib/python/kdoc/kdoc_yaml_file.py
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -85,7 +85,7 @@ class KDocTestFile():
 
         return d
 
-    def output_symbols(self, fname, symbols, source):
+    def output_symbols(self, fname, symbols):
         """
         Store source, symbols and output strings at self.tests.
         """
@@ -96,16 +96,10 @@ class KDocTestFile():
         kdoc_item = []
         expected = []
 
-        if not symbols and not source:
-            return
-
-        if not source or len(symbols) != len(source):
-            print(f"Warning: lengths are different. Ignoring {fname}")
-
-            # Folding without line numbers is too hard.
-            # The right thing to do here to proceed would be to delete
-            # not-handled source blocks, as len(source) should be bigger
-            # than len(symbols)
+        #
+        # Source code didn't produce any symbol
+        #
+        if not symbols:
             return
 
         base_name = "test_" + fname.replace(".", "_").replace("/", "_")
@@ -115,9 +109,15 @@ class KDocTestFile():
         for i in range(0, len(symbols)):
             arg = symbols[i]
 
-            if "KdocItem" in self.yaml_content:
+            source = arg.get("source", "")
+
+            if arg and "KdocItem" in self.yaml_content:
                 msg = self.get_kdoc_item(arg)
 
+                other_stuff = msg.get("other_stuff", {})
+                if "source" in other_stuff:
+                    del other_stuff["source"]
+
                 expected_dict["kdoc_item"] = msg
 
             for out_style in self.out_style:
@@ -132,9 +132,9 @@ class KDocTestFile():
 
             test = {
                 "name": name,
-                "description": f"{fname} line {source[i]["ln"]}",
+                "description": f"{fname} line {arg.declaration_start_line}",
                 "fname": fname,
-                "source": source[i]["data"],
+                "source": source,
                 "expected": [expected_dict]
             }
 
-- 
cgit v1.2.3


From e786fab2cfcc9ab65adf35d2eab4ca94abe1955f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:51 +0100
Subject: docs: kdoc_yaml_file: use a better name for the tests

Instead of always using a name with a number on it, use
the name of the object directly whenever possible.

When the name is already used, append a number prefix at
the end.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <d1c4cd94547d843af0debf9e317e006d55d705f1.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_yaml_file.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
index 1e2ae7c59d70..0be020d50df0 100644
--- a/tools/lib/python/kdoc/kdoc_yaml_file.py
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -25,6 +25,7 @@ class KDocTestFile():
         self.config = config
         self.test_file = os.path.expanduser(yaml_file)
         self.yaml_content = yaml_content
+        self.test_names = set()
 
         self.tests = []
 
@@ -102,13 +103,10 @@ class KDocTestFile():
         if not symbols:
             return
 
-        base_name = "test_" + fname.replace(".", "_").replace("/", "_")
         expected_dict = {}
         start_line=1
 
-        for i in range(0, len(symbols)):
-            arg = symbols[i]
-
+        for arg in symbols:
             source = arg.get("source", "")
 
             if arg and "KdocItem" in self.yaml_content:
@@ -120,6 +118,21 @@ class KDocTestFile():
 
                 expected_dict["kdoc_item"] = msg
 
+            base_name = arg.name
+            if not base_name:
+                base_name = fname
+            base_name = base_name.lower().replace(".", "_").replace("/", "_")
+
+
+            # Don't add duplicated names
+            i = 0
+            name = base_name
+            while name in self.test_names:
+                i += 1
+                name = f"{base_name}_{i:03d}"
+
+            self.test_names.add(name)
+
             for out_style in self.out_style:
                 if isinstance(out_style, ManFormat):
                     key = "man"
@@ -128,8 +141,6 @@ class KDocTestFile():
 
                 expected_dict[key]= out_style.output_symbols(fname, [arg]).strip()
 
-            name = f"{base_name}_{i:03d}"
-
             test = {
                 "name": name,
                 "description": f"{fname} line {arg.declaration_start_line}",
-- 
cgit v1.2.3


From 9c3911812b4a719623ea7502b419929eb01b2fc2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:52 +0100
Subject: docs: kdoc_output: raise an error if full_proto not available for var

This is mandatory, but if it is missing, we need to know what
symbol had problems.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <0c3d9dd25889784b999efdb354ade48264c0e03c.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_output.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 2bfcd356654b..de107ab4a281 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -513,7 +513,9 @@ class RestFormat(OutputFormat):
     def out_var(self, fname, name, args):
         oldprefix = self.lineprefix
         ln = args.declaration_start_line
-        full_proto = args.other_stuff["full_proto"]
+        full_proto = args.other_stuff.get("full_proto")
+        if not full_proto:
+            raise KeyError(f"Can't find full proto for {name} variable")
 
         self.lineprefix = "  "
 
-- 
cgit v1.2.3


From 2ca0b54dca438edb0f6b0eec7913d3cab60ddebf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 23 Mar 2026 10:10:53 +0100
Subject: docs: c_lex.py: store logger on its data

By having the logger stored there, any code using CTokenizer can
log messages there.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <467979dc18149e4b2a7113c178e0cb07919632f2.1774256269.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/c_lex.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index e01b154f458e..cb95f5172448 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -177,7 +177,7 @@ class CTokenizer():
     # This class is inspired and follows the basic concepts of:
     #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
 
-    def __init__(self, source=None, log=None):
+    def __init__(self, source=None):
         """
         Create a regular expression to handle RE_SCANNER_LIST.
 
@@ -188,6 +188,12 @@ class CTokenizer():
         when matching a code via RE_SCANNER.
         """
 
+        #
+        # Store logger to allow parser classes to re-use it
+        #
+        global log
+        self.log = log
+
         self.tokens = []
 
         if not source:
-- 
cgit v1.2.3


From d642acfd597e3ec37138f9a8f5a634845e3612fd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 27 Mar 2026 06:57:48 +0100
Subject: doc tools: better handle KBUILD_VERBOSE

As reported by Jacob, there are troubles when KBUILD_VERBOSE is
set at the environment.

Fix it on both kernel-doc and sphinx-build-wrapper.

Reported-by: Jacob Keller <jacob.e.keller@intel.com>
Closes: https://lore.kernel.org/linux-doc/9367d899-53af-4d9c-9320-22fc4dbadca5@intel.com/
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Tested-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <7a99788db75630fb14828d612c0fd77c45ec1891.1774591065.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/kdoc/kdoc_files.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 2428cfc4e843..ed82b6e6ab25 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -238,7 +238,12 @@ class KernelFiles():
         """
 
         if not verbose:
-            verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
+            try:
+                verbose = bool(int(os.environ.get("KBUILD_VERBOSE", 0)))
+            except ValueError:
+                # Handles an eventual case where verbosity is not a number
+                # like KBUILD_VERBOSE=""
+                verbose = False
 
         if out_style is None:
             out_style = OutputFormat()
-- 
cgit v1.2.3


From 07f6cb18c5dd627023e0810cfd51203392f55990 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 26 Mar 2026 20:09:42 +0100
Subject: tools: unittest_helper: add a quiet mode

On quiet mode, only report errors.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <27556792ff70e6267ecd19c258149d380db8d423.1774551940.git.mchehab+huawei@kernel.org>
---
 tools/lib/python/unittest_helper.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'tools/lib/python')

diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py
index 55d444cd73d4..f3cba5120401 100755
--- a/tools/lib/python/unittest_helper.py
+++ b/tools/lib/python/unittest_helper.py
@@ -141,7 +141,7 @@ class Summary(unittest.TestResult):
         super().addSkip(test, reason)
         self._record_test(test, f"SKIP ({reason})")
 
-    def printResults(self):
+    def printResults(self, verbose):
         """
         Print results using colors if tty.
         """
@@ -174,10 +174,15 @@ class Summary(unittest.TestResult):
 
         # Print results
         for module_name, classes in self.test_results.items():
-            print(f"{module_name}:")
+            if verbose:
+                print(f"{module_name}:")
             for class_name, tests in classes.items():
-                print(f"    {class_name}:")
+                if verbose:
+                    print(f"    {class_name}:")
                 for test_name, status in tests:
+                    if not verbose and status in [ "OK", "EXPECTED_FAIL" ]:
+                        continue
+
                     # Get base status without reason for SKIP
                     if status.startswith("SKIP"):
                         status_code = status.split()[0]
@@ -187,7 +192,8 @@ class Summary(unittest.TestResult):
                     print(
                         f"        {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}"
                     )
-            print()
+            if verbose:
+                print()
 
         # Print summary
         print(f"\nRan {self.testsRun} tests", end="")
@@ -230,6 +236,7 @@ class TestUnits:
         """Returns a parser for command line arguments."""
         parser = argparse.ArgumentParser(description="Test runner with regex filtering")
         parser.add_argument("-v", "--verbose", action="count", default=1)
+        parser.add_argument("-q", "--quiet", action="store_true")
         parser.add_argument("-f", "--failfast", action="store_true")
         parser.add_argument("-k", "--keyword",
                             help="Regex pattern to filter test methods")
@@ -279,7 +286,10 @@ class TestUnits:
         if not caller_file and not suite:
             raise TypeError("Either caller_file or suite is needed at TestUnits")
 
-        verbose = args.verbose
+        if args.quiet:
+            verbose = 0
+        else:
+            verbose = args.verbose
 
         if not env:
             env = os.environ.copy()
@@ -334,7 +344,7 @@ class TestUnits:
                                             failfast=args.failfast)
         result = runner.run(suite)
         if resultclass:
-            result.printResults()
+            result.printResults(verbose)
 
         sys.exit(not result.wasSuccessful())
 
-- 
cgit v1.2.3