summaryrefslogtreecommitdiff
path: root/tools/lib/python
diff options
context:
space:
mode:
authorJonathan Corbet <corbet@lwn.net>2026-03-22 15:25:08 -0600
committerJonathan Corbet <corbet@lwn.net>2026-03-22 15:25:08 -0600
commit0a1a27776ddf0072883cdb4a61b91155553fcb96 (patch)
tree278b2bec91d7c05f9c3132da36256b1d433b35ab /tools/lib/python
parent781171bec0650c00c642564afcb5cce57abda5bf (diff)
parent01d6d7bf9672f1aeabbffaa3fbfb8017223ff878 (diff)
Merge branch 'mauro' into docs-mw
This series comes after: https://lore.kernel.org/linux-doc/cover.1773770483.git.mchehab+huawei@kernel.org/ It basically contains patches I submitted before on a 40+ patch series, but were less relevant, plus a couple of other minor fixes: - patch 1 improves one of the CTokenizer unit test, fixing some potential issues on it; - patches 2 and 3 contain some improvement/fixes for Sphinx Python autodoc extension. They basically document c_lex.py; - The remaining patches: - create a new class for kernel-doc config; - fix some internal representations of KdocItem; - add unit tests for KernelDoc() parser class; - add support to output KdocItem in YAML, which is a machine-readable output for all documented kAPI. None of the patches should affect man or html output.
Diffstat (limited to 'tools/lib/python')
-rw-r--r--tools/lib/python/kdoc/c_lex.py9
-rw-r--r--tools/lib/python/kdoc/kdoc_files.py104
-rw-r--r--tools/lib/python/kdoc/kdoc_item.py39
-rw-r--r--tools/lib/python/kdoc/kdoc_output.py23
-rw-r--r--tools/lib/python/kdoc/kdoc_parser.py33
-rw-r--r--tools/lib/python/kdoc/kdoc_re.py9
-rw-r--r--tools/lib/python/kdoc/kdoc_yaml_file.py155
7 files changed, 325 insertions, 47 deletions
diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
index b6d58bd470a9b..e01b154f458ec 100644
--- a/tools/lib/python/kdoc/c_lex.py
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -336,13 +336,14 @@ class CTokenArgs:
self.sub_tokeninzer = CTokenizer(sub_str)
def groups(self, new_tokenizer):
- """
+ r"""
Create replacement arguments for backrefs like:
- ``\0``, ``\1``, ``\2``, ...``\n``
+ ``\0``, ``\1``, ``\2``, ... ``\{number}``
- It also accepts a ``+`` character to the highest backref. When used,
- it means in practice to ignore delimins after it, being greedy.
+ It also accepts a ``+`` character to the highest backref, like
+ ``\4+``. When used, the backref will be greedy, picking all other
+ arguments afterwards.
The logic is smart enough to only go up to the maximum required
argument, even if there are more.
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 8c20596239495..5a299ed44d621 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -9,7 +9,6 @@ Classes for navigating through the files that kernel-doc needs to handle
to generate documentation.
"""
-import argparse
import logging
import os
import re
@@ -17,6 +16,7 @@ import re
from kdoc.kdoc_parser import KernelDoc
from kdoc.xforms_lists import CTransforms
from kdoc.kdoc_output import OutputFormat
+from kdoc.kdoc_yaml_file import KDocTestFile
class GlobSourceFiles:
@@ -87,6 +87,28 @@ class GlobSourceFiles:
file_not_found_cb(fname)
+class KdocConfig():
+ """
+ Stores all configuration attributes that kdoc_parser and kdoc_output
+ needs.
+ """
+ def __init__(self, verbose=False, werror=False, wreturn=False,
+ wshort_desc=False, wcontents_before_sections=False,
+ logger=None):
+
+ self.verbose = verbose
+ self.werror = werror
+ self.wreturn = wreturn
+ self.wshort_desc = wshort_desc
+ self.wcontents_before_sections = wcontents_before_sections
+
+ if logger:
+ self.log = logger
+ else:
+ self.log = logging.getLogger(__file__)
+
+ self.warning = self.log.warning
+
class KernelFiles():
"""
Parse kernel-doc tags on multiple kernel source files.
@@ -131,6 +153,12 @@ class KernelFiles():
If not specified, defaults to use: ``logging.getLogger("kernel-doc")``
+ ``yaml_file``
+ If defined, stores the output inside a YAML file.
+
+ ``yaml_content``
+ Defines what will be inside the YAML file.
+
Note:
There are two type of parsers defined here:
@@ -160,7 +188,12 @@ class KernelFiles():
if fname in self.files:
return
- doc = KernelDoc(self.config, fname, self.xforms)
+ if self.test_file:
+ store_src = True
+ else:
+ store_src = False
+
+ doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src)
export_table, entries = doc.parse_kdoc()
self.export_table[fname] = export_table
@@ -170,6 +203,10 @@ class KernelFiles():
self.results[fname] = entries
+ source = doc.get_source()
+ if source:
+ self.source[fname] = source
+
def process_export_file(self, fname):
"""
Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
@@ -199,7 +236,7 @@ class KernelFiles():
def __init__(self, verbose=False, out_style=None, xforms=None,
werror=False, wreturn=False, wshort_desc=False,
wcontents_before_sections=False,
- logger=None):
+ yaml_file=None, yaml_content=None, logger=None):
"""
Initialize startup variables and parse all files.
"""
@@ -224,37 +261,40 @@ class KernelFiles():
if kdoc_werror:
werror = kdoc_werror
+ if not logger:
+ logger = logging.getLogger("kernel-doc")
+ else:
+ logger = logger
+
# Some variables are global to the parser logic as a whole as they are
# used to send control configuration to KernelDoc class. As such,
# those variables are read-only inside the KernelDoc.
- self.config = argparse.Namespace
+ self.config = KdocConfig(verbose, werror, wreturn, wshort_desc,
+ wcontents_before_sections, logger)
+
+ # Override log warning, as we want to count errors
+ self.config.warning = self.warning
- self.config.verbose = verbose
- self.config.werror = werror
- self.config.wreturn = wreturn
- self.config.wshort_desc = wshort_desc
- self.config.wcontents_before_sections = wcontents_before_sections
+ if yaml_file:
+ self.test_file = KDocTestFile(self.config, yaml_file, yaml_content)
+ else:
+ self.test_file = None
if xforms:
self.xforms = xforms
else:
self.xforms = CTransforms()
- if not logger:
- self.config.log = logging.getLogger("kernel-doc")
- else:
- self.config.log = logger
-
- self.config.warning = self.warning
-
self.config.src_tree = os.environ.get("SRCTREE", None)
# Initialize variables that are internal to KernelFiles
self.out_style = out_style
+ self.out_style.set_config(self.config)
self.errors = 0
self.results = {}
+ self.source = {}
self.files = set()
self.export_files = set()
@@ -294,8 +334,6 @@ class KernelFiles():
returning kernel-doc markups on each interaction.
"""
- self.out_style.set_config(self.config)
-
if not filenames:
filenames = sorted(self.results.keys())
@@ -315,29 +353,29 @@ class KernelFiles():
for s in symbol:
function_table.add(s)
- self.out_style.set_filter(export, internal, symbol, nosymbol,
- function_table, enable_lineno,
- no_doc_sections)
-
- msg = ""
if fname not in self.results:
self.config.log.warning("No kernel-doc for file %s", fname)
continue
symbols = self.results[fname]
- self.out_style.set_symbols(symbols)
- for arg in symbols:
- m = self.out_msg(fname, arg.name, arg)
+ if self.test_file:
+ self.test_file.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
- if m is None:
- ln = arg.get("ln", 0)
- dtype = arg.get('type', "")
+ self.test_file.output_symbols(fname, symbols,
+ self.source.get(fname))
- self.config.log.warning("%s:%d Can't handle %s",
- fname, ln, dtype)
- else:
- msg += m
+ continue
+ self.out_style.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
+
+ msg = self.out_style.output_symbols(fname, symbols)
if msg:
yield fname, msg
+
+ if self.test_file:
+ self.test_file.write()
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index 2b8a93f79716f..fe08cac861c29 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -22,15 +22,34 @@ class KdocItem:
self.sections = {}
self.sections_start_lines = {}
self.parameterlist = []
- self.parameterdesc_start_lines = []
+ self.parameterdesc_start_lines = {}
self.parameterdescs = {}
self.parametertypes = {}
+
+ self.warnings = []
+
#
# Just save everything else into our own dict so that the output
# side can grab it directly as before. As we move things into more
# structured data, this will, hopefully, fade away.
#
- self.other_stuff = other_stuff
+ known_keys = {
+ 'declaration_start_line',
+ 'sections',
+ 'sections_start_lines',
+ 'parameterlist',
+ 'parameterdesc_start_lines',
+ 'parameterdescs',
+ 'parametertypes',
+ 'warnings',
+ }
+
+ self.other_stuff = {}
+ for k, v in other_stuff.items():
+ if k in known_keys:
+ setattr(self, k, v) # real attribute
+ else:
+ self.other_stuff[k] = v
def get(self, key, default = None):
"""
@@ -41,6 +60,20 @@ class KdocItem:
def __getitem__(self, key):
return self.get(key)
+ @classmethod
+ def from_dict(cls, d):
+ """Create a KdocItem from a plain dict."""
+
+ cp = d.copy()
+ name = cp.pop('name', None)
+ fname = cp.pop('fname', None)
+ type = cp.pop('type', None)
+ start_line = cp.pop('start_line', 1)
+ other_stuff = cp.pop('other_stuff', {})
+
+ # Everything that’s left goes straight to __init__
+ return cls(name, fname, type, start_line, **cp, **other_stuff)
+
#
# Tracking of section and parameter information.
#
@@ -49,7 +82,7 @@ class KdocItem:
Set sections and start lines.
"""
self.sections = sections
- self.section_start_lines = start_lines
+ self.sections_start_lines = start_lines
def set_params(self, names, descs, types, starts):
"""
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 08539dd92cbb4..1b54117dbe19c 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -222,6 +222,27 @@ class OutputFormat:
return None
+ def output_symbols(self, fname, symbols):
+ """
+ Handles a set of KdocItem symbols.
+ """
+ self.set_symbols(symbols)
+
+ msg = ""
+ for arg in symbols:
+ m = self.msg(fname, arg.name, arg)
+
+ if m is None:
+ ln = arg.get("ln", 0)
+ dtype = arg.get('type', "")
+
+ self.config.log.warning("%s:%d Can't handle %s",
+ fname, ln, dtype)
+ else:
+ msg += m
+
+ return msg
+
# Virtual methods to be overridden by inherited classes
# At the base class, those do nothing.
def set_symbols(self, symbols):
@@ -368,7 +389,7 @@ class RestFormat(OutputFormat):
else:
self.data += f'{self.lineprefix}**{section}**\n\n'
- self.print_lineno(args.section_start_lines.get(section, 0))
+ self.print_lineno(args.sections_start_lines.get(section, 0))
self.output_highlight(text)
self.data += "\n"
self.data += "\n"
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index f6c4ee3b18c90..a10e64589d76e 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -140,7 +140,7 @@ class KernelEntry:
self.parametertypes = {}
self.parameterdesc_start_lines = {}
- self.section_start_lines = {}
+ self.sections_start_lines = {}
self.sections = {}
self.anon_struct_union = False
@@ -220,7 +220,7 @@ class KernelEntry:
self.sections[name] += '\n' + contents
else:
self.sections[name] = contents
- self.section_start_lines[name] = self.new_start_line
+ self.sections_start_lines[name] = self.new_start_line
self.new_start_line = 0
# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
@@ -246,12 +246,13 @@ class KernelDoc:
#: String to write when a parameter is not described.
undescribed = "-- undescribed --"
- def __init__(self, config, fname, xforms):
+ def __init__(self, config, fname, xforms, store_src=False):
"""Initialize internal variables"""
self.fname = fname
self.config = config
self.xforms = xforms
+ self.store_src = store_src
tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
@@ -264,6 +265,9 @@ class KernelDoc:
# Place all potential outputs into an array
self.entries = []
+ # When store_src is true, the kernel-doc source content is stored here
+ self.source = None
+
#
# We need Python 3.7 for its "dicts remember the insertion
# order" guarantee
@@ -316,7 +320,7 @@ class KernelDoc:
for section in ["Description", "Return"]:
if section in sections and not sections[section].rstrip():
del sections[section]
- item.set_sections(sections, self.entry.section_start_lines)
+ item.set_sections(sections, self.entry.sections_start_lines)
item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
self.entry.parametertypes,
self.entry.parameterdesc_start_lines)
@@ -1592,6 +1596,15 @@ class KernelDoc:
state.DOCBLOCK: process_docblock,
}
+ def get_source(self):
+ """
+ Return the file content of the lines handled by kernel-doc at the
+ latest parse_kdoc() run.
+
+ Returns none if KernelDoc() was not initialized with store_src,
+ """
+ return self.source
+
def parse_kdoc(self):
"""
Open and process each line of a C source file.
@@ -1605,6 +1618,8 @@ class KernelDoc:
prev = ""
prev_ln = None
export_table = set()
+ self.source = []
+ self.state = state.NORMAL
try:
with open(self.fname, "r", encoding="utf8",
@@ -1631,6 +1646,8 @@ class KernelDoc:
ln, state.name[self.state],
line)
+ prev_state = self.state
+
# This is an optimization over the original script.
# There, when export_file was used for the same file,
# it was read twice. Here, we use the already-existing
@@ -1641,6 +1658,14 @@ class KernelDoc:
# Hand this line to the appropriate state handler
self.state_actions[self.state](self, ln, line)
+ if self.store_src and prev_state != self.state or self.state != state.NORMAL:
+ if self.state == state.NAME:
+ # A "/**" was detected. Add a new source element
+ self.source.append({"ln": ln, "data": line + "\n"})
+ else:
+ # Append to the existing one
+ self.source[-1]["data"] += line + "\n"
+
self.emit_unused_warnings()
except OSError:
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 6f3ae28859eaa..28292efe25a2c 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -70,10 +70,15 @@ class KernRe:
flags_name = " | ".join(flags)
+ max_len = 60
+ pattern = ""
+ for pos in range(0, len(self.regex.pattern), max_len):
+ pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" '
+
if flags_name:
- return f'KernRe("{self.regex.pattern}", {flags_name})'
+ return f'KernRe({pattern}, {flags_name})'
else:
- return f'KernRe("{self.regex.pattern}")'
+ return f'KernRe({pattern})'
def __add__(self, other):
"""
diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
new file mode 100644
index 0000000000000..db131503c3f6b
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import os
+
+from kdoc.kdoc_output import ManFormat, RestFormat
+
+
+class KDocTestFile():
+ """
+ Handles the logic needed to store kernel‑doc output inside a YAML file.
+ Useful for unit tests and regression tests.
+ """
+
+ def __init__(self, config, yaml_file, yaml_content):
+ #
+ # Bail out early if yaml is not available
+ #
+ try:
+ import yaml
+ except ImportError:
+ sys.exit("Warning: yaml package not available. Aborting it.")
+
+ self.config = config
+ self.test_file = os.path.expanduser(yaml_file)
+ self.yaml_content = yaml_content
+
+ self.tests = []
+
+ out_dir = os.path.dirname(self.test_file)
+ if out_dir and not os.path.isdir(out_dir):
+ sys.exit(f"Directory {out_dir} doesn't exist.")
+
+ self.out_style = []
+
+ if "man" in self.yaml_content:
+ out_style = ManFormat()
+ out_style.set_config(self.config)
+
+ self.out_style.append(out_style)
+
+ if "rst" in self.yaml_content:
+ out_style = RestFormat()
+ out_style.set_config(self.config)
+
+ self.out_style.append(out_style)
+
+ def set_filter(self, export, internal, symbol, nosymbol,
+ function_table, enable_lineno, no_doc_sections):
+ """
+ Set filters at the output classes.
+ """
+ for out_style in self.out_style:
+ out_style.set_filter(export, internal, symbol,
+ nosymbol, function_table,
+ enable_lineno, no_doc_sections)
+
+ @staticmethod
+ def get_kdoc_item(arg, start_line=1):
+
+ d = vars(arg)
+
+ declaration_start_line = d.get("declaration_start_line")
+ if not declaration_start_line:
+ return d
+
+ d["declaration_start_line"] = start_line
+
+ parameterdesc_start_lines = d.get("parameterdesc_start_lines")
+ if parameterdesc_start_lines:
+ for key in parameterdesc_start_lines:
+ ln = parameterdesc_start_lines[key]
+ ln += start_line - declaration_start_line
+
+ parameterdesc_start_lines[key] = ln
+
+ sections_start_lines = d.get("sections_start_lines")
+ if sections_start_lines:
+ for key in sections_start_lines:
+ ln = sections_start_lines[key]
+ ln += start_line - declaration_start_line
+
+ sections_start_lines[key] = ln
+
+ return d
+
+ def output_symbols(self, fname, symbols, source):
+ """
+ Store source, symbols and output strings at self.tests.
+ """
+
+ #
+ # KdocItem needs to be converted into dicts
+ #
+ kdoc_item = []
+ expected = []
+
+ if not symbols and not source:
+ return
+
+ if not source or len(symbols) != len(source):
+ print(f"Warning: lengths are different. Ignoring {fname}")
+
+ # Folding without line numbers is too hard.
+ # The right thing to do here to proceed would be to delete
+ # not-handled source blocks, as len(source) should be bigger
+ # than len(symbols)
+ return
+
+ base_name = "test_" + fname.replace(".", "_").replace("/", "_")
+ expected_dict = {}
+ start_line=1
+
+ for i in range(0, len(symbols)):
+ arg = symbols[i]
+
+ if "KdocItem" in self.yaml_content:
+ msg = self.get_kdoc_item(arg)
+
+ expected_dict["kdoc_item"] = msg
+
+ for out_style in self.out_style:
+ if isinstance(out_style, ManFormat):
+ key = "man"
+ else:
+ key = "rst"
+
+ expected_dict[key]= out_style.output_symbols(fname, [arg])
+
+ name = f"{base_name}_{i:03d}"
+
+ test = {
+ "name": name,
+ "description": f"{fname} line {source[i]["ln"]}",
+ "fname": fname,
+ "source": source[i]["data"],
+ "expected": [expected_dict]
+ }
+
+ self.tests.append(test)
+
+ expected_dict = {}
+
+ def write(self):
+ """
+ Output the content of self.tests to self.test_file.
+ """
+ import yaml
+
+ data = {"tests": self.tests}
+
+ with open(self.test_file, "w", encoding="utf-8") as fp:
+ yaml.safe_dump(data, fp, sort_keys=False, default_style="|",
+ default_flow_style=False, allow_unicode=True)