Coverage for src/md_multiline_table/__init__.py: 88%
76 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-07-01 12:02 +0000
« prev ^ index » next coverage.py v7.14.3, created at 2026-07-01 12:02 +0000
1# SPDX-FileCopyrightText: 2026 German Aerospace Center (DLR)
2# SPDX-License-Identifier: Apache-2.0
3#
4"""Multiline Table extension for Python Markdown."""
6from logging import getLogger
7import re
8from typing import Any, no_type_check
9import xml.etree.ElementTree as etree
11from markdown import Markdown
12from markdown.blockparser import BlockParser
13from markdown.extensions.tables import PIPE_LEFT, PIPE_NONE, PIPE_RIGHT, TableExtension, TableProcessor
16class MultilineTableProcessor(TableProcessor):
17 """Multiline Table Python Markdown extension markdown code block processor."""
19 RE_END_BORDER = re.compile(r"(?<!\\)(?:\\\\)*(\|\+?|:)$")
21 def __init__(self, parser: BlockParser, config: dict[str, Any]) -> None:
22 """
23 Initialize new instance of the multiline table python extension markdown block processor.
25 Args:
26 parser (BlockParser): block parser
27 config (dict[str, Any]): all configuration options
28 """
29 super().__init__(parser, config)
30 self._logger = getLogger("MARKDOWN")
32 @no_type_check
33 def test(self, parent: etree.Element, block: str) -> bool:
34 """
35 Ensure that the first few rows contains a valid table head (column header and separator row).
37 This method does exactly the same like its super() version except that the separator row
38 does not have to be in the second row to support multiline headers.
39 See variable `format_row`.
41 Args:
42 parent (etree.Element): parent html element
43 block (str): markdown code
45 Returns:
46 bool: whether markdown code is a table or not
47 """
48 is_table = False
49 rows = [row.strip(" ") for row in block.split("\n")]
50 if len(rows) > 1:
51 header0 = rows[0]
52 self.border = PIPE_NONE
53 if header0.startswith("|"):
54 self.border |= PIPE_LEFT
55 if self.RE_END_BORDER.search(header0) is not None:
56 self.border |= PIPE_RIGHT
57 row = self._split_row(header0)
58 row0_len = len(row)
59 is_table = row0_len > 1
61 # Each row in a single column table needs at least one pipe.
62 if not is_table and row0_len == 1 and self.border:
63 for index in range(1, len(rows)):
64 is_table = rows[index].startswith("|")
65 if not is_table:
66 is_table = self.RE_END_BORDER.search(rows[index]) is not None
67 if not is_table: 67 ↛ 68line 67 didn't jump to line 68 because the condition on line 67 was never true
68 break
70 if is_table:
71 format_row = next((row for idx, row in enumerate(rows) if idx > 0 and row.endswith("|")))
72 row = self._split_row(format_row)
73 is_table = (len(row) == row0_len) and set("".join(row)) <= set("|:- ")
74 if is_table: 74 ↛ 77line 74 didn't jump to line 77 because the condition on line 74 was always true
75 self.separator = row
77 return is_table
79 def run(self, parent: etree.Element, blocks: list[str]) -> None:
80 """
81 Transform multiline table to standard markdown table.
83 Args:
84 parent (etree.Element): parent html element
85 blocks (list[str]): markdown code blocks
86 """
87 lines = blocks.pop(0).split("\n")
89 # sanity check which should never fail due to self.test(...)
90 if len(lines) < 2: 90 ↛ 91line 90 didn't jump to line 91 because the condition on line 90 was never true
91 self._logger.warning("Broken table with less then 2 lines detected.")
92 return
93 column_count = len(re.split(r"(?<!\\)(?:\\\\)*\|", lines[0])) - 2
94 if column_count < 1: 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true
95 self._logger.warning("Broken table with less then 1 column detected.")
96 return
98 # convert multiline table to default markdown table format
99 blocks.insert(0, self._convert_table(lines, column_count))
100 super().run(parent, blocks)
102 def _convert_table(self, lines: list[str], column_count: int) -> str:
103 line_idx = 1
104 while line_idx < len(lines):
105 line = lines[line_idx]
106 if line.strip()[0] != ":" and not line.strip().endswith("|+"):
107 line_idx += 1
108 continue
110 re_split_pattern = r"(?<!\\)(?:\\\\)*:" if not line.strip().endswith("|+") else r"(?<!\\)(?:\\\\)*\|"
111 columns = re.split(re_split_pattern, line)[1:-1]
112 if len(columns) != column_count: 112 ↛ 113line 112 didn't jump to line 113 because the condition on line 112 was never true
113 self._logger.warning(
114 "Table column count mismatch. Row has %d instead of the expected %d columns.",
115 len(columns),
116 column_count,
117 )
119 columns_previous = re.split(r"(?<!\\)(?:\\\\)*\|", lines[line_idx - 1])[1:-1]
120 lines[line_idx - 1] = (
121 "| "
122 + " | ".join(
123 " ".join(part.strip() for part in column_parts).strip()
124 for column_parts in zip(columns_previous, columns, strict=False)
125 )
126 + " |"
127 )
128 lines.pop(line_idx)
130 return "\n".join(lines)
133class MultilineTableExtension(TableExtension):
134 """Multiline Table Python Markdown extension."""
136 def extendMarkdown(self, md: Markdown) -> None:
137 """
138 Extend markdown instance with multiline table extension.
140 Args:
141 md (Markdown): markdown instance to extend
142 """
143 if "|" not in md.ESCAPED_CHARS: 143 ↛ 145line 143 didn't jump to line 145 because the condition on line 143 was always true
144 md.ESCAPED_CHARS.append("|")
145 if ":" not in md.ESCAPED_CHARS: 145 ↛ 147line 145 didn't jump to line 147 because the condition on line 145 was always true
146 md.ESCAPED_CHARS.append(":")
147 processor = MultilineTableProcessor(md.parser, self.getConfigs())
148 md.parser.blockprocessors.register(processor, "md-multiline-table", 106)
151def make_extension(*_args, **kwargs) -> MultilineTableExtension:
152 """
153 Make a new instance of `MultilineTableExtension`.
155 Arguments:
156 _args: Will be ignored.
157 kwargs: Further Extension arguments.
159 Returns:
160 MultilineTableExtension: new instance of MultilineTableExtension
161 """
162 return MultilineTableExtension(**kwargs)