Coverage for md_multiline_table / __init__.py: 88%

75 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-01 17:16 +0000

1# SPDX-FileCopyrightText: 2025 German Aerospace Center (DLR) 

2# SPDX-License-Identifier: Apache-2.0 

3# 

4"""Multiline Table extension for Python Markdown.""" 

5 

6from logging import getLogger 

7import re 

8from typing import Any 

9import xml.etree.ElementTree as etree 

10 

11from markdown import Markdown 

12from markdown.blockparser import BlockParser 

13from markdown.extensions.tables import PIPE_LEFT, PIPE_NONE, PIPE_RIGHT, TableExtension, TableProcessor 

14 

15 

16class MultilineTableProcessor(TableProcessor): 

17 """Multiline Table Python Markdown extension markdown code block processor.""" 

18 

19 RE_END_BORDER = re.compile(r"(?<!\\)(?:\\\\)*(\|\+?|:)$") 

20 

21 def __init__(self, parser: BlockParser, config: dict[str, Any]) -> None: 

22 """ 

23 Initialize new instance of the multiline table python extension markdown block processor. 

24 

25 Args: 

26 parser (BlockParser): block parser 

27 config (dict[str, Any]): all configuration options 

28 """ 

29 super().__init__(parser, config) 

30 self._logger = getLogger("MARKDOWN") 

31 

32 def test(self, parent: etree.Element, block: str) -> bool: 

33 """ 

34 Ensure that the first few rows contains a valid table head (column header and separator row). 

35 

36 This method does exactly the same like its super() version except that the separator row 

37 does not have to be in the second row to support multiline headers. 

38 See variable `format_row`. 

39 

40 Args: 

41 parent (etree.Element): parent html element 

42 block (str): markdown code 

43 

44 Returns: 

45 bool: whether markdown code is a table or not 

46 """ 

47 is_table = False 

48 rows = [row.strip(" ") for row in block.split("\n")] 

49 if len(rows) > 1: 

50 header0 = rows[0] 

51 self.border = PIPE_NONE # type: ignore[assignment] 

52 if header0.startswith("|"): 

53 self.border |= PIPE_LEFT # type: ignore[assignment] 

54 if self.RE_END_BORDER.search(header0) is not None: 

55 self.border |= PIPE_RIGHT # type: ignore[assignment] 

56 row = self._split_row(header0) # type: ignore[attr-defined] 

57 row0_len = len(row) 

58 is_table = row0_len > 1 

59 

60 # Each row in a single column table needs at least one pipe. 

61 if not is_table and row0_len == 1 and self.border: 

62 for index in range(1, len(rows)): 

63 is_table = rows[index].startswith("|") 

64 if not is_table: 

65 is_table = self.RE_END_BORDER.search(rows[index]) is not None 

66 if not is_table: 66 ↛ 67line 66 didn't jump to line 67 because the condition on line 66 was never true

67 break 

68 

69 if is_table: 

70 format_row = next((row for idx, row in enumerate(rows) if idx > 0 and row.endswith("|"))) 

71 row = self._split_row(format_row) # type: ignore[attr-defined] 

72 is_table = (len(row) == row0_len) and set("".join(row)) <= set("|:- ") 

73 if is_table: 73 ↛ 76line 73 didn't jump to line 76 because the condition on line 73 was always true

74 self.separator = row 

75 

76 return is_table 

77 

78 def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: 

79 """ 

80 Transform multiline table to standard markdown table. 

81 

82 Args: 

83 parent (etree.Element): parent html element 

84 blocks (list[str]): markdown code blocks 

85 

86 Returns: 

87 bool | None: success state 

88 """ 

89 lines = blocks.pop(0).split("\n") 

90 

91 # sanity check which should never fail due to self.test(...) 

92 if len(lines) < 2: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 self._logger.warning("Broken table with less then 2 lines detected.") 

94 return False 

95 column_count = len(re.split(r"(?<!\\)(?:\\\\)*\|", lines[0])) - 2 

96 if column_count < 1: 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true

97 self._logger.warning("Broken table with less then 1 column detected.") 

98 return False 

99 

100 # convert multiline table to default markdown table format 

101 blocks.insert(0, self._convert_table(lines, column_count)) 

102 return super().run(parent, blocks) 

103 

104 def _convert_table(self, lines: list[str], column_count: int) -> str: 

105 line_idx = 1 

106 while line_idx < len(lines): 

107 line = lines[line_idx] 

108 if line.strip()[0] != ":" and not line.strip().endswith("|+"): 

109 line_idx += 1 

110 continue 

111 

112 re_split_pattern = r"(?<!\\)(?:\\\\)*:" if not line.strip().endswith("|+") else r"(?<!\\)(?:\\\\)*\|" 

113 columns = re.split(re_split_pattern, line)[1:-1] 

114 if len(columns) != column_count: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true

115 self._logger.warning( 

116 "Table column count mismatch. " 

117 f"Row has {len(columns)} instead of the expected {column_count} columns." 

118 ) 

119 

120 columns_previous = re.split(r"(?<!\\)(?:\\\\)*\|", lines[line_idx - 1])[1:-1] 

121 lines[line_idx - 1] = ( 

122 "| " 

123 + " | ".join( 

124 map( 

125 lambda column_parts: " ".join(map(lambda part: part.strip(), column_parts)).strip(), 

126 zip(columns_previous, columns, strict=False), 

127 ) 

128 ) 

129 + " |" 

130 ) 

131 lines.pop(line_idx) 

132 

133 return "\n".join(lines) 

134 

135 

136class MultilineTableExtension(TableExtension): 

137 """Multiline Table Python Markdown extension.""" 

138 

139 def extendMarkdown(self, md: Markdown) -> None: 

140 """ 

141 Extend markdown instance with multiline table extension. 

142 

143 Args: 

144 md (Markdown): markdown instance to extend 

145 """ 

146 if "|" not in md.ESCAPED_CHARS: 146 ↛ 148line 146 didn't jump to line 148 because the condition on line 146 was always true

147 md.ESCAPED_CHARS.append("|") 

148 if ":" not in md.ESCAPED_CHARS: 148 ↛ 150line 148 didn't jump to line 150 because the condition on line 148 was always true

149 md.ESCAPED_CHARS.append(":") 

150 processor = MultilineTableProcessor(md.parser, self.getConfigs()) 

151 md.parser.blockprocessors.register(processor, "md-multiline-table", 106) 

152 

153 

154def make_extension(*args, **kwargs) -> MultilineTableExtension: 

155 """ 

156 Make a new instance of `MultilineTableExtension`. 

157 

158 Returns: 

159 MultilineTableExtension: new instance of MultilineTableExtension 

160 """ 

161 return MultilineTableExtension(*args, **kwargs)