diff options
| author | Jonathan Corbet <corbet@lwn.net> | 2026-03-22 15:25:08 -0600 |
|---|---|---|
| committer | Jonathan Corbet <corbet@lwn.net> | 2026-03-22 15:25:08 -0600 |
| commit | 0a1a27776ddf0072883cdb4a61b91155553fcb96 (patch) | |
| tree | 278b2bec91d7c05f9c3132da36256b1d433b35ab /tools/lib/python | |
| parent | 781171bec0650c00c642564afcb5cce57abda5bf (diff) | |
| parent | 01d6d7bf9672f1aeabbffaa3fbfb8017223ff878 (diff) | |
Merge branch 'mauro' into docs-mw
This series comes after:
https://lore.kernel.org/linux-doc/cover.1773770483.git.mchehab+huawei@kernel.org/
It basically contains patches I submitted before on a 40+ patch series,
but were less relevant, plus a couple of other minor fixes:
- patch 1 improves one of the CTokenizer unit test, fixing some
potential issues on it;
- patches 2 and 3 contain some improvement/fixes for Sphinx
Python autodoc extension. They basically document c_lex.py;
- The remaining patches:
- create a new class for kernel-doc config;
- fix some internal representations of KdocItem;
- add unit tests for KernelDoc() parser class;
- add support to output KdocItem in YAML, which is a
machine-readable output for all documented kAPI.
None of the patches should affect man or html output.
Diffstat (limited to 'tools/lib/python')
| -rw-r--r-- | tools/lib/python/kdoc/c_lex.py | 9 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_files.py | 104 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_item.py | 39 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_output.py | 23 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_parser.py | 33 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_re.py | 9 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_yaml_file.py | 155 |
7 files changed, 325 insertions, 47 deletions
diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py index b6d58bd470a9b..e01b154f458ec 100644 --- a/tools/lib/python/kdoc/c_lex.py +++ b/tools/lib/python/kdoc/c_lex.py @@ -336,13 +336,14 @@ class CTokenArgs: self.sub_tokeninzer = CTokenizer(sub_str) def groups(self, new_tokenizer): - """ + r""" Create replacement arguments for backrefs like: - ``\0``, ``\1``, ``\2``, ...``\n`` + ``\0``, ``\1``, ``\2``, ... ``\{number}`` - It also accepts a ``+`` character to the highest backref. When used, - it means in practice to ignore delimins after it, being greedy. + It also accepts a ``+`` character to the highest backref, like + ``\4+``. When used, the backref will be greedy, picking all other + arguments afterwards. The logic is smart enough to only go up to the maximum required argument, even if there are more. diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 8c20596239495..5a299ed44d621 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -9,7 +9,6 @@ Classes for navigating through the files that kernel-doc needs to handle to generate documentation. """ -import argparse import logging import os import re @@ -17,6 +16,7 @@ import re from kdoc.kdoc_parser import KernelDoc from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat +from kdoc.kdoc_yaml_file import KDocTestFile class GlobSourceFiles: @@ -87,6 +87,28 @@ class GlobSourceFiles: file_not_found_cb(fname) +class KdocConfig(): + """ + Stores all configuration attributes that kdoc_parser and kdoc_output + needs. + """ + def __init__(self, verbose=False, werror=False, wreturn=False, + wshort_desc=False, wcontents_before_sections=False, + logger=None): + + self.verbose = verbose + self.werror = werror + self.wreturn = wreturn + self.wshort_desc = wshort_desc + self.wcontents_before_sections = wcontents_before_sections + + if logger: + self.log = logger + else: + self.log = logging.getLogger(__file__) + + self.warning = self.log.warning + class KernelFiles(): """ Parse kernel-doc tags on multiple kernel source files. @@ -131,6 +153,12 @@ class KernelFiles(): If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + ``yaml_file`` + If defined, stores the output inside a YAML file. + + ``yaml_content`` + Defines what will be inside the YAML file. + Note: There are two type of parsers defined here: @@ -160,7 +188,12 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname, self.xforms) + if self.test_file: + store_src = True + else: + store_src = False + + doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -170,6 +203,10 @@ class KernelFiles(): self.results[fname] = entries + source = doc.get_source() + if source: + self.source[fname] = source + def process_export_file(self, fname): """ Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file. @@ -199,7 +236,7 @@ class KernelFiles(): def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None): + yaml_file=None, yaml_content=None, logger=None): """ Initialize startup variables and parse all files. """ @@ -224,37 +261,40 @@ class KernelFiles(): if kdoc_werror: werror = kdoc_werror + if not logger: + logger = logging.getLogger("kernel-doc") + else: + logger = logger + # Some variables are global to the parser logic as a whole as they are # used to send control configuration to KernelDoc class. As such, # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace + self.config = KdocConfig(verbose, werror, wreturn, wshort_desc, + wcontents_before_sections, logger) + + # Override log warning, as we want to count errors + self.config.warning = self.warning - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections + if yaml_file: + self.test_file = KDocTestFile(self.config, yaml_file, yaml_content) + else: + self.test_file = None if xforms: self.xforms = xforms else: self.xforms = CTransforms() - if not logger: - self.config.log = logging.getLogger("kernel-doc") - else: - self.config.log = logger - - self.config.warning = self.warning - self.config.src_tree = os.environ.get("SRCTREE", None) # Initialize variables that are internal to KernelFiles self.out_style = out_style + self.out_style.set_config(self.config) self.errors = 0 self.results = {} + self.source = {} self.files = set() self.export_files = set() @@ -294,8 +334,6 @@ class KernelFiles(): returning kernel-doc markups on each interaction. """ - self.out_style.set_config(self.config) - if not filenames: filenames = sorted(self.results.keys()) @@ -315,29 +353,29 @@ class KernelFiles(): for s in symbol: function_table.add(s) - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" if fname not in self.results: self.config.log.warning("No kernel-doc for file %s", fname) continue symbols = self.results[fname] - self.out_style.set_symbols(symbols) - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) + if self.test_file: + self.test_file.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") + self.test_file.output_symbols(fname, symbols, + self.source.get(fname)) - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m + continue + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + + msg = self.out_style.output_symbols(fname, symbols) if msg: yield fname, msg + + if self.test_file: + self.test_file.write() diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index 2b8a93f79716f..fe08cac861c29 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -22,15 +22,34 @@ class KdocItem: self.sections = {} self.sections_start_lines = {} self.parameterlist = [] - self.parameterdesc_start_lines = [] + self.parameterdesc_start_lines = {} self.parameterdescs = {} self.parametertypes = {} + + self.warnings = [] + # # Just save everything else into our own dict so that the output # side can grab it directly as before. As we move things into more # structured data, this will, hopefully, fade away. # - self.other_stuff = other_stuff + known_keys = { + 'declaration_start_line', + 'sections', + 'sections_start_lines', + 'parameterlist', + 'parameterdesc_start_lines', + 'parameterdescs', + 'parametertypes', + 'warnings', + } + + self.other_stuff = {} + for k, v in other_stuff.items(): + if k in known_keys: + setattr(self, k, v) # real attribute + else: + self.other_stuff[k] = v def get(self, key, default = None): """ @@ -41,6 +60,20 @@ class KdocItem: def __getitem__(self, key): return self.get(key) + @classmethod + def from_dict(cls, d): + """Create a KdocItem from a plain dict.""" + + cp = d.copy() + name = cp.pop('name', None) + fname = cp.pop('fname', None) + type = cp.pop('type', None) + start_line = cp.pop('start_line', 1) + other_stuff = cp.pop('other_stuff', {}) + + # Everything that’s left goes straight to __init__ + return cls(name, fname, type, start_line, **cp, **other_stuff) + # # Tracking of section and parameter information. # @@ -49,7 +82,7 @@ class KdocItem: Set sections and start lines. """ self.sections = sections - self.section_start_lines = start_lines + self.sections_start_lines = start_lines def set_params(self, names, descs, types, starts): """ diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 08539dd92cbb4..1b54117dbe19c 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -222,6 +222,27 @@ class OutputFormat: return None + def output_symbols(self, fname, symbols): + """ + Handles a set of KdocItem symbols. + """ + self.set_symbols(symbols) + + msg = "" + for arg in symbols: + m = self.msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + return msg + # Virtual methods to be overridden by inherited classes # At the base class, those do nothing. def set_symbols(self, symbols): @@ -368,7 +389,7 @@ class RestFormat(OutputFormat): else: self.data += f'{self.lineprefix}**{section}**\n\n' - self.print_lineno(args.section_start_lines.get(section, 0)) + self.print_lineno(args.sections_start_lines.get(section, 0)) self.output_highlight(text) self.data += "\n" self.data += "\n" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index f6c4ee3b18c90..a10e64589d76e 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -140,7 +140,7 @@ class KernelEntry: self.parametertypes = {} self.parameterdesc_start_lines = {} - self.section_start_lines = {} + self.sections_start_lines = {} self.sections = {} self.anon_struct_union = False @@ -220,7 +220,7 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line + self.sections_start_lines[name] = self.new_start_line self.new_start_line = 0 # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) @@ -246,12 +246,13 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname, xforms): + def __init__(self, config, fname, xforms, store_src=False): """Initialize internal variables""" self.fname = fname self.config = config self.xforms = xforms + self.store_src = store_src tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") @@ -264,6 +265,9 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] + # When store_src is true, the kernel-doc source content is stored here + self.source = None + # # We need Python 3.7 for its "dicts remember the insertion # order" guarantee @@ -316,7 +320,7 @@ class KernelDoc: for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] - item.set_sections(sections, self.entry.section_start_lines) + item.set_sections(sections, self.entry.sections_start_lines) item.set_params(self.entry.parameterlist, self.entry.parameterdescs, self.entry.parametertypes, self.entry.parameterdesc_start_lines) @@ -1592,6 +1596,15 @@ class KernelDoc: state.DOCBLOCK: process_docblock, } + def get_source(self): + """ + Return the file content of the lines handled by kernel-doc at the + latest parse_kdoc() run. + + Returns none if KernelDoc() was not initialized with store_src, + """ + return self.source + def parse_kdoc(self): """ Open and process each line of a C source file. @@ -1605,6 +1618,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() + self.source = [] + self.state = state.NORMAL try: with open(self.fname, "r", encoding="utf8", @@ -1631,6 +1646,8 @@ class KernelDoc: ln, state.name[self.state], line) + prev_state = self.state + # This is an optimization over the original script. # There, when export_file was used for the same file, # it was read twice. Here, we use the already-existing @@ -1641,6 +1658,14 @@ class KernelDoc: # Hand this line to the appropriate state handler self.state_actions[self.state](self, ln, line) + if self.store_src and prev_state != self.state or self.state != state.NORMAL: + if self.state == state.NAME: + # A "/**" was detected. Add a new source element + self.source.append({"ln": ln, "data": line + "\n"}) + else: + # Append to the existing one + self.source[-1]["data"] += line + "\n" + self.emit_unused_warnings() except OSError: diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 6f3ae28859eaa..28292efe25a2c 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -70,10 +70,15 @@ class KernRe: flags_name = " | ".join(flags) + max_len = 60 + pattern = "" + for pos in range(0, len(self.regex.pattern), max_len): + pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" ' + if flags_name: - return f'KernRe("{self.regex.pattern}", {flags_name})' + return f'KernRe({pattern}, {flags_name})' else: - return f'KernRe("{self.regex.pattern}")' + return f'KernRe({pattern})' def __add__(self, other): """ diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py new file mode 100644 index 0000000000000..db131503c3f6b --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. + +import os + +from kdoc.kdoc_output import ManFormat, RestFormat + + +class KDocTestFile(): + """ + Handles the logic needed to store kernel‑doc output inside a YAML file. + Useful for unit tests and regression tests. + """ + + def __init__(self, config, yaml_file, yaml_content): + # + # Bail out early if yaml is not available + # + try: + import yaml + except ImportError: + sys.exit("Warning: yaml package not available. Aborting it.") + + self.config = config + self.test_file = os.path.expanduser(yaml_file) + self.yaml_content = yaml_content + + self.tests = [] + + out_dir = os.path.dirname(self.test_file) + if out_dir and not os.path.isdir(out_dir): + sys.exit(f"Directory {out_dir} doesn't exist.") + + self.out_style = [] + + if "man" in self.yaml_content: + out_style = ManFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + if "rst" in self.yaml_content: + out_style = RestFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + def set_filter(self, export, internal, symbol, nosymbol, + function_table, enable_lineno, no_doc_sections): + """ + Set filters at the output classes. + """ + for out_style in self.out_style: + out_style.set_filter(export, internal, symbol, + nosymbol, function_table, + enable_lineno, no_doc_sections) + + @staticmethod + def get_kdoc_item(arg, start_line=1): + + d = vars(arg) + + declaration_start_line = d.get("declaration_start_line") + if not declaration_start_line: + return d + + d["declaration_start_line"] = start_line + + parameterdesc_start_lines = d.get("parameterdesc_start_lines") + if parameterdesc_start_lines: + for key in parameterdesc_start_lines: + ln = parameterdesc_start_lines[key] + ln += start_line - declaration_start_line + + parameterdesc_start_lines[key] = ln + + sections_start_lines = d.get("sections_start_lines") + if sections_start_lines: + for key in sections_start_lines: + ln = sections_start_lines[key] + ln += start_line - declaration_start_line + + sections_start_lines[key] = ln + + return d + + def output_symbols(self, fname, symbols, source): + """ + Store source, symbols and output strings at self.tests. + """ + + # + # KdocItem needs to be converted into dicts + # + kdoc_item = [] + expected = [] + + if not symbols and not source: + return + + if not source or len(symbols) != len(source): + print(f"Warning: lengths are different. Ignoring {fname}") + + # Folding without line numbers is too hard. + # The right thing to do here to proceed would be to delete + # not-handled source blocks, as len(source) should be bigger + # than len(symbols) + return + + base_name = "test_" + fname.replace(".", "_").replace("/", "_") + expected_dict = {} + start_line=1 + + for i in range(0, len(symbols)): + arg = symbols[i] + + if "KdocItem" in self.yaml_content: + msg = self.get_kdoc_item(arg) + + expected_dict["kdoc_item"] = msg + + for out_style in self.out_style: + if isinstance(out_style, ManFormat): + key = "man" + else: + key = "rst" + + expected_dict[key]= out_style.output_symbols(fname, [arg]) + + name = f"{base_name}_{i:03d}" + + test = { + "name": name, + "description": f"{fname} line {source[i]["ln"]}", + "fname": fname, + "source": source[i]["data"], + "expected": [expected_dict] + } + + self.tests.append(test) + + expected_dict = {} + + def write(self): + """ + Output the content of self.tests to self.test_file. + """ + import yaml + + data = {"tests": self.tests} + + with open(self.test_file, "w", encoding="utf-8") as fp: + yaml.safe_dump(data, fp, sort_keys=False, default_style="|", + default_flow_style=False, allow_unicode=True) |
