Coverage for md_multiline_table / __init__.py: 88%
75 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-01 17:16 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-01 17:16 +0000
1# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR)
2# SPDX-License-Identifier: Apache-2.0
3#
4"""Multiline Table extension for Python Markdown."""
6from logging import getLogger
7import re
8from typing import Any
9import xml.etree.ElementTree as etree
11from markdown import Markdown
12from markdown.blockparser import BlockParser
13from markdown.extensions.tables import PIPE_LEFT, PIPE_NONE, PIPE_RIGHT, TableExtension, TableProcessor
16class MultilineTableProcessor(TableProcessor):
17 """Multiline Table Python Markdown extension markdown code block processor."""
19 RE_END_BORDER = re.compile(r"(?<!\\)(?:\\\\)*(\|\+?|:)$")
21 def __init__(self, parser: BlockParser, config: dict[str, Any]) -> None:
22 """
23 Initialize new instance of the multiline table python extension markdown block processor.
25 Args:
26 parser (BlockParser): block parser
27 config (dict[str, Any]): all configuration options
28 """
29 super().__init__(parser, config)
30 self._logger = getLogger("MARKDOWN")
32 def test(self, parent: etree.Element, block: str) -> bool:
33 """
34 Ensure that the first few rows contains a valid table head (column header and separator row).
36 This method does exactly the same like its super() version except that the separator row
37 does not have to be in the second row to support multiline headers.
38 See variable `format_row`.
40 Args:
41 parent (etree.Element): parent html element
42 block (str): markdown code
44 Returns:
45 bool: whether markdown code is a table or not
46 """
47 is_table = False
48 rows = [row.strip(" ") for row in block.split("\n")]
49 if len(rows) > 1:
50 header0 = rows[0]
51 self.border = PIPE_NONE # type: ignore[assignment]
52 if header0.startswith("|"):
53 self.border |= PIPE_LEFT # type: ignore[assignment]
54 if self.RE_END_BORDER.search(header0) is not None:
55 self.border |= PIPE_RIGHT # type: ignore[assignment]
56 row = self._split_row(header0) # type: ignore[attr-defined]
57 row0_len = len(row)
58 is_table = row0_len > 1
60 # Each row in a single column table needs at least one pipe.
61 if not is_table and row0_len == 1 and self.border:
62 for index in range(1, len(rows)):
63 is_table = rows[index].startswith("|")
64 if not is_table:
65 is_table = self.RE_END_BORDER.search(rows[index]) is not None
66 if not is_table: 66 ↛ 67line 66 didn't jump to line 67 because the condition on line 66 was never true
67 break
69 if is_table:
70 format_row = next((row for idx, row in enumerate(rows) if idx > 0 and row.endswith("|")))
71 row = self._split_row(format_row) # type: ignore[attr-defined]
72 is_table = (len(row) == row0_len) and set("".join(row)) <= set("|:- ")
73 if is_table: 73 ↛ 76line 73 didn't jump to line 76 because the condition on line 73 was always true
74 self.separator = row
76 return is_table
78 def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
79 """
80 Transform multiline table to standard markdown table.
82 Args:
83 parent (etree.Element): parent html element
84 blocks (list[str]): markdown code blocks
86 Returns:
87 bool | None: success state
88 """
89 lines = blocks.pop(0).split("\n")
91 # sanity check which should never fail due to self.test(...)
92 if len(lines) < 2: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 self._logger.warning("Broken table with less then 2 lines detected.")
94 return False
95 column_count = len(re.split(r"(?<!\\)(?:\\\\)*\|", lines[0])) - 2
96 if column_count < 1: 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true
97 self._logger.warning("Broken table with less then 1 column detected.")
98 return False
100 # convert multiline table to default markdown table format
101 blocks.insert(0, self._convert_table(lines, column_count))
102 return super().run(parent, blocks)
104 def _convert_table(self, lines: list[str], column_count: int) -> str:
105 line_idx = 1
106 while line_idx < len(lines):
107 line = lines[line_idx]
108 if line.strip()[0] != ":" and not line.strip().endswith("|+"):
109 line_idx += 1
110 continue
112 re_split_pattern = r"(?<!\\)(?:\\\\)*:" if not line.strip().endswith("|+") else r"(?<!\\)(?:\\\\)*\|"
113 columns = re.split(re_split_pattern, line)[1:-1]
114 if len(columns) != column_count: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true
115 self._logger.warning(
116 "Table column count mismatch. "
117 f"Row has {len(columns)} instead of the expected {column_count} columns."
118 )
120 columns_previous = re.split(r"(?<!\\)(?:\\\\)*\|", lines[line_idx - 1])[1:-1]
121 lines[line_idx - 1] = (
122 "| "
123 + " | ".join(
124 map(
125 lambda column_parts: " ".join(map(lambda part: part.strip(), column_parts)).strip(),
126 zip(columns_previous, columns, strict=False),
127 )
128 )
129 + " |"
130 )
131 lines.pop(line_idx)
133 return "\n".join(lines)
136class MultilineTableExtension(TableExtension):
137 """Multiline Table Python Markdown extension."""
139 def extendMarkdown(self, md: Markdown) -> None:
140 """
141 Extend markdown instance with multiline table extension.
143 Args:
144 md (Markdown): markdown instance to extend
145 """
146 if "|" not in md.ESCAPED_CHARS: 146 ↛ 148line 146 didn't jump to line 148 because the condition on line 146 was always true
147 md.ESCAPED_CHARS.append("|")
148 if ":" not in md.ESCAPED_CHARS: 148 ↛ 150line 148 didn't jump to line 150 because the condition on line 148 was always true
149 md.ESCAPED_CHARS.append(":")
150 processor = MultilineTableProcessor(md.parser, self.getConfigs())
151 md.parser.blockprocessors.register(processor, "md-multiline-table", 106)
154def make_extension(*args, **kwargs) -> MultilineTableExtension:
155 """
156 Make a new instance of `MultilineTableExtension`.
158 Returns:
159 MultilineTableExtension: new instance of MultilineTableExtension
160 """
161 return MultilineTableExtension(*args, **kwargs)