1 files changed, 91 insertions, 201 deletions
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index ca00695b47b3..74af7ae47aa4 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,8 @@ import sys
 import re
 from pprint import pformat
 
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
+from kdoc.kdoc_re import KernRe
 from kdoc.kdoc_item import KdocItem
 
 #
@@ -70,140 +71,9 @@ doc_begin_func = KernRe(str(doc_com) +			# initial " * '
                         cache = False)
 
 #
-# Here begins a long set of transformations to turn structure member prefixes
-# and macro invocations into something we can parse and generate kdoc for.
-#
-struct_args_pattern = r'([^,)]+)'
-
-struct_xforms = [
-    # Strip attributes
-    (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
-    (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
-    (KernRe(r'\s*__packed\s*', re.S), ' '),
-    (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
-    (KernRe(r'\s*__private', re.S), ' '),
-    (KernRe(r'\s*__rcu', re.S), ' '),
-    (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
-    (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
-    (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
-    #
-    # Unwrap struct_group macros based on this definition:
-    # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
-    # which has variants like: struct_group(NAME, MEMBERS...)
-    # Only MEMBERS arguments require documentation.
-    #
-    # Parsing them happens on two steps:
-    #
-    # 1. drop struct group arguments that aren't at MEMBERS,
-    #    storing them as STRUCT_GROUP(MEMBERS)
-    #
-    # 2. remove STRUCT_GROUP() ancillary macro.
-    #
-    # The original logic used to remove STRUCT_GROUP() using an
-    # advanced regex:
-    #
-    #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
-    #
-    # with two patterns that are incompatible with
-    # Python re module, as it has:
-    #
-    #   - a recursive pattern: (?1)
-    #   - an atomic grouping: (?>...)
-    #
-    # I tried a simpler version: but it didn't work either:
-    #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
-    #
-    # As it doesn't properly match the end parenthesis on some cases.
-    #
-    # So, a better solution was crafted: there's now a NestedMatch
-    # class that ensures that delimiters after a search are properly
-    # matched. So, the implementation to drop STRUCT_GROUP() will be
-    # handled in separate.
-    #
-    (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
-    (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
-    (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
-    (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
-    #
-    # Replace macros
-    #
-    # TODO: use NestedMatch for FOO($1, $2, ...) matches
-    #
-    # it is better to also move those to the NestedMatch logic,
-    # to ensure that parentheses will be properly matched.
-    #
-    (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
-     r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
-    (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
-     r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
-    (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-            re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
-    (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
-            re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
-    (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
-            r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-    (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
-            struct_args_pattern + r'\)', re.S), r'\2 *\1'),
-    (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
-            struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
-    (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
-    (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
-]
-#
-# Regexes here are guaranteed to have the end delimiter matching
-# the start delimiter. Yet, right now, only one replace group
-# is allowed.
-#
-struct_nested_prefixes = [
-    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
-]
-
-#
-# Transforms for function prototypes
-#
-function_xforms  = [
-    (KernRe(r"^static +"), ""),
-    (KernRe(r"^extern +"), ""),
-    (KernRe(r"^asmlinkage +"), ""),
-    (KernRe(r"^inline +"), ""),
-    (KernRe(r"^__inline__ +"), ""),
-    (KernRe(r"^__inline +"), ""),
-    (KernRe(r"^__always_inline +"), ""),
-    (KernRe(r"^noinline +"), ""),
-    (KernRe(r"^__FORTIFY_INLINE +"), ""),
-    (KernRe(r"__init +"), ""),
-    (KernRe(r"__init_or_module +"), ""),
-    (KernRe(r"__exit +"), ""),
-    (KernRe(r"__deprecated +"), ""),
-    (KernRe(r"__flatten +"), ""),
-    (KernRe(r"__meminit +"), ""),
-    (KernRe(r"__must_check +"), ""),
-    (KernRe(r"__weak +"), ""),
-    (KernRe(r"__sched +"), ""),
-    (KernRe(r"_noprof"), ""),
-    (KernRe(r"__always_unused *"), ""),
-    (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
-    (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
-    (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
-    (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
-    (KernRe(r"__attribute_const__ +"), ""),
-    (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
-]
-
-#
 # Ancillary functions
 #
 
-def apply_transforms(xforms, text):
-    """
-    Apply a set of transforms to a block of text.
-    """
-    for search, subst in xforms:
-        text = search.sub(subst, text)
-    return text
-
 multi_space = KernRe(r'\s\s+')
 def trim_whitespace(s):
     """
@@ -215,15 +85,9 @@ def trim_private_members(text):
     """
     Remove ``struct``/``enum`` members that have been marked "private".
     """
-    # First look for a "public:" block that ends a private region, then
-    # handle the "private until the end" case.
-    #
-    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
-    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
-    #
-    # We needed the comments to do the above, but now we can take them out.
-    #
-    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+    tokens = CTokenizer(text)
+    return str(tokens)
 
 class state:
     """
@@ -276,7 +140,7 @@ class KernelEntry:
         self.parametertypes = {}
         self.parameterdesc_start_lines = {}
 
-        self.section_start_lines = {}
+        self.sections_start_lines = {}
         self.sections = {}
 
         self.anon_struct_union = False
@@ -356,7 +220,7 @@ class KernelEntry:
                 self.sections[name] += '\n' + contents
             else:
                 self.sections[name] = contents
-                self.section_start_lines[name] = self.new_start_line
+                self.sections_start_lines[name] = self.new_start_line
                 self.new_start_line = 0
 
 #        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
@@ -382,11 +246,15 @@ class KernelDoc:
     #: String to write when a parameter is not described.
     undescribed = "-- undescribed --"
 
-    def __init__(self, config, fname):
+    def __init__(self, config, fname, xforms, store_src=False):
         """Initialize internal variables"""
 
         self.fname = fname
         self.config = config
+        self.xforms = xforms
+        self.store_src = store_src
+
+        tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
 
         # Initial state for the state machines
         self.state = state.NORMAL
@@ -449,7 +317,7 @@ class KernelDoc:
         for section in ["Description", "Return"]:
             if section in sections and not sections[section].rstrip():
                 del sections[section]
-        item.set_sections(sections, self.entry.section_start_lines)
+        item.set_sections(sections, self.entry.sections_start_lines)
         item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
                         self.entry.parametertypes,
                         self.entry.parameterdesc_start_lines)
@@ -849,13 +717,15 @@ class KernelDoc:
         return declaration
 
 
-    def dump_struct(self, ln, proto):
+    def dump_struct(self, ln, proto, source):
         """
         Store an entry for a ``struct`` or ``union``
         """
         #
         # Do the basic parse to get the pieces of the declaration.
         #
+        source = source
+        proto = trim_private_members(proto)
         struct_parts = self.split_struct_proto(proto)
         if not struct_parts:
             self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
@@ -869,12 +739,8 @@ class KernelDoc:
         #
         # Go through the list of members applying all of our transformations.
         #
-        members = trim_private_members(members)
-        members = apply_transforms(struct_xforms, members)
+        members = self.xforms.apply("struct", members)
 
-        nested = NestedMatch()
-        for search, sub in struct_nested_prefixes:
-            members = nested.sub(search, sub, members)
         #
         # Deal with embedded struct and union members, and drop enums entirely.
         #
@@ -888,10 +754,11 @@ class KernelDoc:
                                    declaration_name)
         self.check_sections(ln, declaration_name, decl_type)
         self.output_declaration(decl_type, declaration_name,
+                                source=source,
                                 definition=self.format_struct_decl(declaration),
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_enum(self, ln, proto):
+    def dump_enum(self, ln, proto, source):
         """
         Store an ``enum`` inside self.entries array.
         """
@@ -899,6 +766,8 @@ class KernelDoc:
         # Strip preprocessor directives.  Note that this depends on the
         # trailing semicolon we added in process_proto_type().
         #
+        source = source
+        proto = trim_private_members(proto)
         proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
         #
         # Parse out the name and members of the enum.  Typedef form first.
@@ -906,7 +775,7 @@ class KernelDoc:
         r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
         if r.search(proto):
             declaration_name = r.group(2)
-            members = trim_private_members(r.group(1))
+            members = r.group(1)
         #
         # Failing that, look for a straight enum
         #
@@ -914,7 +783,7 @@ class KernelDoc:
             r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
             if r.match(proto):
                 declaration_name = r.group(1)
-                members = trim_private_members(r.group(2))
+                members = r.group(2)
         #
         # OK, this isn't going to work.
         #
@@ -943,9 +812,10 @@ class KernelDoc:
         member_set = set()
         members = KernRe(r'\([^;)]*\)').sub('', members)
         for arg in members.split(','):
-            if not arg:
-                continue
             arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+            if not arg.strip():
+                continue
+
             self.entry.parameterlist.append(arg)
             if arg not in self.entry.parameterdescs:
                 self.entry.parameterdescs[arg] = self.undescribed
@@ -961,29 +831,23 @@ class KernelDoc:
                               f"Excess enum value '@{k}' description in '{declaration_name}'")
 
         self.output_declaration('enum', declaration_name,
+                                source=source,
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_var(self, ln, proto):
+    def dump_var(self, ln, proto, source):
         """
         Store variables that are part of kAPI.
         """
         VAR_ATTRIBS = [
             "extern",
+            "const",
         ]
-        OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
-
-        sub_prefixes = [
-            (KernRe(r"__read_mostly"), ""),
-            (KernRe(r"__ro_after_init"), ""),
-            (KernRe(r"(?://.*)$"), ""),
-            (KernRe(r"(?:/\*.*\*/)"), ""),
-            (KernRe(r";$"), ""),
-            (KernRe(r"=.*"), ""),
-        ]
+        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
 
         #
         # Store the full prototype before modifying it
         #
+        source = source
         full_proto = proto
         declaration_name = None
 
@@ -1004,8 +868,7 @@ class KernelDoc:
         # Drop comments and macros to have a pure C prototype
         #
         if not declaration_name:
-            for r, sub in sub_prefixes:
-                proto = r.sub(sub, proto)
+            proto = self.xforms.apply("var", proto)
 
         proto = proto.rstrip()
 
@@ -1015,17 +878,17 @@ class KernelDoc:
 
         default_val = None
 
-        r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
         if r.match(proto):
             if not declaration_name:
                 declaration_name = r.group(1)
 
             default_val = r.group(2)
         else:
-            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
-        if r.match(proto):
-            default_val = r.group(1)
+            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
 
+            if r.match(proto):
+                default_val = r.group(1)
         if not declaration_name:
            self.emit_msg(ln,f"{proto}: can't parse variable")
            return
@@ -1034,39 +897,38 @@ class KernelDoc:
             default_val = default_val.lstrip("=").strip()
 
         self.output_declaration("var", declaration_name,
+                                source=source,
                                 full_proto=full_proto,
                                 default_val=default_val,
                                 purpose=self.entry.declaration_purpose)
 
-    def dump_declaration(self, ln, prototype):
+    def dump_declaration(self, ln, prototype, source):
         """
         Store a data declaration inside self.entries array.
         """
 
         if self.entry.decl_type == "enum":
-            self.dump_enum(ln, prototype)
+            self.dump_enum(ln, prototype, source)
         elif self.entry.decl_type == "typedef":
-            self.dump_typedef(ln, prototype)
+            self.dump_typedef(ln, prototype, source)
         elif self.entry.decl_type in ["union", "struct"]:
-            self.dump_struct(ln, prototype)
+            self.dump_struct(ln, prototype, source)
         elif self.entry.decl_type == "var":
-            self.dump_var(ln, prototype)
+            self.dump_var(ln, prototype, source)
         else:
             # This would be a bug
             self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
 
-    def dump_function(self, ln, prototype):
+    def dump_function(self, ln, prototype, source):
         """
         Store a function or function macro inside self.entries array.
         """
 
+        source = source
         found = func_macro = False
         return_type = ''
         decl_type = 'function'
-        #
-        # Apply the initial transformations.
-        #
-        prototype = apply_transforms(function_xforms, prototype)
+
         #
         # If we have a macro, remove the "#define" at the front.
         #
@@ -1085,6 +947,11 @@ class KernelDoc:
                 declaration_name = r.group(1)
                 func_macro = True
                 found = True
+        else:
+            #
+            # Apply the initial transformations.
+            #
+            prototype = self.xforms.apply("func", prototype)
 
         # Yes, this truly is vile.  We are looking for:
         # 1. Return type (may be nothing if we're looking at a macro)
@@ -1150,13 +1017,14 @@ class KernelDoc:
         # Store the result.
         #
         self.output_declaration(decl_type, declaration_name,
+                                source=source,
                                 typedef=('typedef' in return_type),
                                 functiontype=return_type,
                                 purpose=self.entry.declaration_purpose,
                                 func_macro=func_macro)
 
 
-    def dump_typedef(self, ln, proto):
+    def dump_typedef(self, ln, proto, source):
         """
         Store a ``typedef`` inside self.entries array.
         """
@@ -1167,6 +1035,8 @@ class KernelDoc:
         typedef_ident = r'\*?\s*(\w\S+)\s*'
         typedef_args = r'\s*\((.*)\);'
 
+        source = source
+
         typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
         typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
 
@@ -1187,6 +1057,7 @@ class KernelDoc:
             self.create_parameter_list(ln, 'function', args, ',', declaration_name)
 
             self.output_declaration('function', declaration_name,
+                                    source=source,
                                     typedef=True,
                                     functiontype=return_type,
                                     purpose=self.entry.declaration_purpose)
@@ -1204,6 +1075,7 @@ class KernelDoc:
                 return
 
             self.output_declaration('typedef', declaration_name,
+                                    source=source,
                                     purpose=self.entry.declaration_purpose)
             return
 
@@ -1241,7 +1113,7 @@ class KernelDoc:
         function_set.add(symbol)
         return True
 
-    def process_normal(self, ln, line):
+    def process_normal(self, ln, line, source):
         """
         STATE_NORMAL: looking for the ``/**`` to begin everything.
         """
@@ -1255,7 +1127,7 @@ class KernelDoc:
         # next line is always the function name
         self.state = state.NAME
 
-    def process_name(self, ln, line):
+    def process_name(self, ln, line, source):
         """
         STATE_NAME: Looking for the "name - description" line
         """
@@ -1388,7 +1260,7 @@ class KernelDoc:
         return False
 
 
-    def process_decl(self, ln, line):
+    def process_decl(self, ln, line, source):
         """
         STATE_DECLARATION: We've seen the beginning of a declaration.
         """
@@ -1417,7 +1289,7 @@ class KernelDoc:
             self.emit_msg(ln, f"bad line: {line}")
 
 
-    def process_special(self, ln, line):
+    def process_special(self, ln, line, source):
         """
         STATE_SPECIAL_SECTION: a section ending with a blank line.
         """
@@ -1468,7 +1340,7 @@ class KernelDoc:
             # Unknown line, ignore
             self.emit_msg(ln, f"bad line: {line}")
 
-    def process_body(self, ln, line):
+    def process_body(self, ln, line, source):
         """
         STATE_BODY: the bulk of a kerneldoc comment.
         """
@@ -1482,7 +1354,7 @@ class KernelDoc:
             # Unknown line, ignore
             self.emit_msg(ln, f"bad line: {line}")
 
-    def process_inline_name(self, ln, line):
+    def process_inline_name(self, ln, line, source):
         """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
 
         if doc_inline_sect.search(line):
@@ -1495,9 +1367,15 @@ class KernelDoc:
         elif doc_content.search(line):
             self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
             self.state = state.PROTO
+
+            #
+            # Don't let it add partial comments at the code, as breaks the
+            # logic meant to remove comments from prototypes.
+            #
+            self.process_proto_type(ln, "/**\n" + line, source)
         # else ... ??
 
-    def process_inline_text(self, ln, line):
+    def process_inline_text(self, ln, line, source):
         """STATE_INLINE_TEXT: docbook comments within a prototype."""
 
         if doc_inline_end.search(line):
@@ -1583,7 +1461,7 @@ class KernelDoc:
 
         return proto
 
-    def process_proto_function(self, ln, line):
+    def process_proto_function(self, ln, line, source):
         """Ancillary routine to process a function prototype."""
 
         # strip C99-style comments to end of line
@@ -1625,10 +1503,10 @@ class KernelDoc:
             #
             # ... and we're done
             #
-            self.dump_function(ln, self.entry.prototype)
+            self.dump_function(ln, self.entry.prototype, source)
             self.reset_state(ln)
 
-    def process_proto_type(self, ln, line):
+    def process_proto_type(self, ln, line, source):
         """
         Ancillary routine to process a type.
         """
@@ -1658,7 +1536,7 @@ class KernelDoc:
                 elif chunk == '}':
                     self.entry.brcount -= 1
                 elif chunk == ';' and self.entry.brcount <= 0:
-                    self.dump_declaration(ln, self.entry.prototype)
+                    self.dump_declaration(ln, self.entry.prototype, source)
                     self.reset_state(ln)
                     return
         #
@@ -1667,7 +1545,7 @@ class KernelDoc:
         #
         self.entry.prototype += ' '
 
-    def process_proto(self, ln, line):
+    def process_proto(self, ln, line, source):
         """STATE_PROTO: reading a function/whatever prototype."""
 
         if doc_inline_oneline.search(line):
@@ -1679,17 +1557,18 @@ class KernelDoc:
             self.state = state.INLINE_NAME
 
         elif self.entry.decl_type == 'function':
-            self.process_proto_function(ln, line)
+            self.process_proto_function(ln, line, source)
 
         else:
-            self.process_proto_type(ln, line)
+            self.process_proto_type(ln, line, source)
 
-    def process_docblock(self, ln, line):
+    def process_docblock(self, ln, line, source):
         """STATE_DOCBLOCK: within a ``DOC:`` block."""
 
         if doc_end.search(line):
             self.dump_section()
-            self.output_declaration("doc", self.entry.identifier)
+            self.output_declaration("doc", self.entry.identifier,
+                                    source=source)
             self.reset_state(ln)
 
         elif doc_content.search(line):
@@ -1740,6 +1619,8 @@ class KernelDoc:
         prev = ""
         prev_ln = None
         export_table = set()
+        self.state = state.NORMAL
+        source = ""
 
         try:
             with open(self.fname, "r", encoding="utf8",
@@ -1766,6 +1647,12 @@ class KernelDoc:
                                           ln, state.name[self.state],
                                           line)
 
+                    if self.store_src:
+                        if source and self.state == state.NORMAL:
+                            source = ""
+                        elif self.state != state.NORMAL:
+                            source += line + "\n"
+
                     # This is an optimization over the original script.
                     # There, when export_file was used for the same file,
                     # it was read twice. Here, we use the already-existing
@@ -1773,8 +1660,11 @@ class KernelDoc:
                     #
                     if (self.state != state.NORMAL) or \
                        not self.process_export(export_table, line):
+                        prev_state = self.state
                         # Hand this line to the appropriate state handler
-                        self.state_actions[self.state](self, ln, line)
+                        self.state_actions[self.state](self, ln, line, source)
+                        if prev_state == state.NORMAL and self.state != state.NORMAL:
+                            source += line + "\n"
 
             self.emit_unused_warnings()