Skip to content

DOMTableSpecParser

dcmspec.dom_table_spec_parser.DOMTableSpecParser

Bases: SpecParser

Parser for DICOM specification tables in XHTML DOM format.

Provides methods to extract, parse, and structure DICOM specification tables from XHTML documents, returning anytree Node objects as structured in-memory representations. Inherits logging from SpecParser.

Source code in src/dcmspec/dom_table_spec_parser.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
class DOMTableSpecParser(SpecParser):
    """Parser for DICOM specification tables in XHTML DOM format.

    Provides methods to extract, parse, and structure DICOM specification tables from XHTML documents,
    returning anytree Node objects as structured in-memory representations.
    Inherits logging from SpecParser.
    """

    def __init__(self, logger=None):
        """Initialize the DOMTableSpecParser.

        Sets up the parser with an optional logger and a DOMUtils instance for DOM navigation.

        Args:
            logger (Optional[logging.Logger]): Logger instance to use. If None, a default logger is created.

        """
        super().__init__(logger=logger)

        self.dom_utils = DOMUtils(logger=self.logger)

    def parse(
        self,
        dom: BeautifulSoup,
        table_id: str,
        column_to_attr: Dict[int, str],
        name_attr: str,
        include_depth: Optional[int] = None,  # None means unlimited
        skip_columns: Optional[list[int]] = None,
    ) -> tuple[Node, Node]:
        """Parse specification metadata and content from tables in the DOM.

        Parses tables within the DOM of a DICOM document and returns a tuple containing
        the metadata node and the table content node as structured in-memory representations.

        Args:
            dom (BeautifulSoup): The parsed XHTML DOM object.
            table_id (str): The ID of the table to parse.
            column_to_attr (Dict[int, str]): Mapping from column indices to attribute names for tree nodes.
            name_attr (str): The attribute name to use for building node names.
            include_depth (Optional[int], optional): The depth to which included tables should be parsed. 
                None means unlimited.
            skip_columns (Optional[list[int]]): List of column indices to skip if the row is missing a column.

        Returns:
            Tuple[Node, Node]: The metadata node and the table content node.

        """
        self._skipped_columns_flag = False

        content = self.parse_table(
            dom, table_id, column_to_attr, name_attr, include_depth=include_depth, skip_columns=skip_columns
        )

        # If we ever skipped columns, remove them from metadata.column_to_attr and realign keys
        if skip_columns and self._skipped_columns_flag:
            kept_items = [(k, v) for k, v in column_to_attr.items() if k not in skip_columns]
            filtered_column_to_attr = {i: v for i, (k, v) in enumerate(kept_items)}
        else:
            filtered_column_to_attr = column_to_attr

        metadata = self.parse_metadata(dom, table_id, filtered_column_to_attr)
        metadata.column_to_attr = filtered_column_to_attr
        metadata.table_id = table_id
        if include_depth is not None:
            metadata.include_depth = int(include_depth)
        return metadata, content

    def parse_table(
        self,
        dom: BeautifulSoup,
        table_id: str,
        column_to_attr: Dict[int, str],
        name_attr: str,
        table_nesting_level: int = 0,
        include_depth: Optional[int] = None,  # None means unlimited
        skip_columns: Optional[list[int]] = None,
    ) -> Node:
        """Parse specification content from tables within the DOM of a DICOM document.

        This method extracts data from each row of the table, handles nested
        tables indicated by "Include" links, and builds a tree-like structure
        of the DICOM attributes which root node is assigned to the attribute
        model.

        Args:
            dom: The BeautifulSoup DOM object.
            table_id: The ID of the table to parse.
            column_to_attr: Mapping between index of columns to parse and tree nodes attributes names
            name_attr: tree node attribute name to use to build node name
            table_nesting_level: The nesting level of the table (used for recursion call only).
            include_depth: The depth to which included tables should be parsed.
            skip_columns (Optional[list[int]]): List of column indices to skip if the row is missing a column.

        Returns:
            root: The root node of the tree representation of the specification table.

        """
        self.logger.info(f"Nesting Level: {table_nesting_level}, Parsing table with id {table_id}")
        # Maps column indices in the DICOM standard table to corresponding node attribute names
        # for constructing a tree-like representation of the table's data.
        # self.column_to_attr = {**{0: "elem_name", 1: "elem_tag"}, **(column_to_attr or {})}

        table = self.dom_utils.get_table(dom, table_id)
        if not table:
            raise ValueError(f"Table with id '{table_id}' not found.")

        if not column_to_attr:
            raise ValueError("Columns to node attributes missing.")
        else:
            self.column_to_attr = column_to_attr

        root = Node("content")
        level_nodes: Dict[int, Node] = {0: root}

        for row in table.find_all("tr")[1:]:
            row_data = self._extract_row_data(row, skip_columns=skip_columns)
            if row_data[name_attr] is None:
                continue  # Skip empty rows
            row_nesting_level = table_nesting_level + row_data[name_attr].count(">")

            # Add nesting level symbols to included table element names except if row is a title
            if table_nesting_level > 0 and not row_data[name_attr].isupper():
                row_data[name_attr] = ">" * table_nesting_level + row_data[name_attr]

            # Process Include statement unless include_depth is defined and not reached
            if "Include" in row_data[name_attr] and (include_depth is None or include_depth > 0):
                next_depth = None if include_depth is None else include_depth - 1
                self._parse_included_table(
                    dom, row, column_to_attr, name_attr, row_nesting_level, next_depth, level_nodes, root
                )
            else:
                node_name = self._sanitize_string(row_data[name_attr])
                self._create_node(node_name, row_data, row_nesting_level, level_nodes, root)

        self.logger.info(f"Nesting Level: {table_nesting_level}, Table parsed successfully")
        return root

    def parse_metadata(
        self,
        dom: BeautifulSoup,
        table_id: str,
        column_to_attr: Dict[int, str],
    ) -> Node:
        """Parse specification metadata from the document and the table within the DOM of a DICOM document.

        This method extracts the version of the DICOM standard and the headers of the tables.

        Args:
            dom: The BeautifulSoup DOM object.
            table_id: The ID of the table to parse.
            column_to_attr: Mapping between index of columns to parse and attributes name.

        Returns:
            metadata_node: The root node of the tree representation of the specification metadata.

        """
        table = self.dom_utils.get_table(dom, table_id)
        if not table:
            raise ValueError(f"Table with id '{table_id}' not found.")

        metadata = Node("metadata")
        # Parse the DICOM Standard document information
        version = self.get_version(dom, table_id)
        metadata.version = version
        # Parse the Attribute table header
        header = self._extract_header(table, column_to_attr=column_to_attr)
        metadata.header = header

        return metadata

    def get_version(self, dom: BeautifulSoup, table_id: str) -> str:
        """Retrieve the DICOM Standard version from the DOM.

        Args:
            dom: The BeautifulSoup DOM object.
            table_id: The ID of the table to retrieve.

        Returns:
            info_node: The info tree node.

        """
        version = self._version_from_book(dom) or self._version_from_section(dom)
        if not version:
            version = ""
            self.logger.warning("DICOM Standard version not found")
        return version

    def _version_from_book(self, dom):
        """Extract version of DICOM books in HTML format."""
        titlepage = dom.find("div", class_="titlepage")
        if titlepage:
            subtitle = titlepage.find("h2", class_="subtitle")
        return subtitle.text.split()[2] if subtitle else None

    def _version_from_section(self, dom):
        """Extract version of DICOM sections in the CHTML format."""
        document_release = dom.find("span", class_="documentreleaseinformation")
        return document_release.text.split()[2] if document_release else None

    def _extract_row_data(
            self, row: Tag, skip_columns: Optional[list[int]] = None) -> Dict[str, Any]:
        """Extract data from a table row.

        Processes each cell in the row, handling colspans and extracting text
        content from paragraphs within the cells. Constructs a dictionary
        containing the extracted data.

        If the row has one less cell than the mapping and skip_columns is set,
        those columns will be skipped for this row, allowing for robust alignment when
        a column is sometimes missing.

        Args:
            row: The table row element (BeautifulSoup Tag).
            table_nesting_level: The nesting level of the table.
            skip_columns (Optional[list[int]]): List of column indices to skip if the row is missing a column.

        Returns:
            A dictionary containing the extracted data from the row.

        """
        # Initialize rowspan trackers if not present
        if not hasattr(self, "_rowspan_trackers") or self._rowspan_trackers is None:
            self._rowspan_trackers = []

        # Add cells from pending rowspans
        cells, colspans, rowspans, col_idx = self._handle_pending_rowspans()

        # Process the actual cells in this row
        col_idx = self._process_actual_cells(row, cells, colspans, rowspans, col_idx)

        # Clean up rowspan trackers for cells that are no longer needed
        if len(self._rowspan_trackers) > col_idx:
            self._rowspan_trackers = self._rowspan_trackers[:col_idx]

        attr_indices = list(self.column_to_attr.keys())

        return (
            self._align_row_with_skipped_columns(
                cells, colspans, attr_indices, skip_columns
            )
            if skip_columns
            and len(cells) == len(self.column_to_attr) - len(skip_columns)
            else self._align_row_default(cells, colspans, attr_indices)
        )

    def _align_row_with_skipped_columns(
        self, cells, colspans, attr_indices, skip_columns
    ):
        # sourcery skip: dict-comprehension, inline-immediately-returned-variable, inline-variable
        """Align cells to attributes when skip_columns is used.

        This method aligns the row's cells to the attribute indices, skipping the columns
        specified in skip_columns. It is used when the row is missing exactly the number of
        columns specified, ensuring the remaining cells are mapped to the correct attributes.

        """
        attr_indices = [i for i in attr_indices if i not in skip_columns]
        # Flag if the skipped_columns were actually skipped
        self._skipped_columns_flag = True
        row_data = {}
        for attr_index, (cell, colspan) in enumerate(zip(cells, colspans)):
            if attr_index < len(attr_indices):
                col_idx_map = attr_indices[attr_index]
                attr = self.column_to_attr[col_idx_map]
                row_data[attr] = cell
        return row_data

    def _align_row_default(self, cells, colspans, attr_indices):
        """Align cells to attributes by default, handling colspans and missing cells.

        Always set all attributes, even if missing in this row, filling spanned columns with None
        to maintain alignment with the column_to_attr mapping.

        """
        row_data = {}
        cell_idx = 0
        attr_indices = sorted(attr_indices)
        i = 0
        while i < len(attr_indices):
            attr = self.column_to_attr[attr_indices[i]]
            if cell_idx < len(cells):
                row_data[attr] = cells[cell_idx]
                colsp = colspans[cell_idx] if cell_idx < len(colspans) else 1
                # Fill in None for skipped columns due to colspan
                for _ in range(1, colsp):
                    i += 1
                    if i < len(attr_indices):
                        skipped_attr = self.column_to_attr[attr_indices[i]]
                        row_data[skipped_attr] = None
                cell_idx += 1
            else:
                row_data[attr] = None
            i += 1
        return row_data

    def _handle_pending_rowspans(self):
        cells = []
        colspans = []
        rowspans = []
        col_idx = 0
        for tracker in self._rowspan_trackers:
            if tracker and tracker["rows_left"] > 0:
                cells.append(tracker["value"])
                colspans.append(tracker["colspan"])
                rowspans.append(tracker["rows_left"])
                tracker["rows_left"] -= 1
                col_idx += tracker["colspan"]
        return cells, colspans, rowspans, col_idx

    def _process_actual_cells(self, row, cells, colspans, rowspans, col_idx):
        cell_iter = iter(row.find_all("td"))
        while True:
            if col_idx >= len(self._rowspan_trackers):
                self._rowspan_trackers.append(None)
            if self._rowspan_trackers[col_idx] and self._rowspan_trackers[col_idx]["rows_left"] > 0:
                # Already filled by rowspan above
                col_idx += self._rowspan_trackers[col_idx]["colspan"]
                continue
            try:
                cell = next(cell_iter)
            except StopIteration:
                break
            paragraphs = cell.find_all("p")
            if paragraphs:
                cell_text = "\n".join(p.text.strip() for p in paragraphs)
            else:
                # Handle cases where no <p> tags present
                cell_text = cell.get_text(strip=True)
            colspan = int(cell.get("colspan", 1))
            rowspan = int(cell.get("rowspan", 1))
            cells.append(cell_text)
            colspans.append(colspan)
            rowspans.append(rowspan)

            for i in range(colspan):
                while len(self._rowspan_trackers) <= col_idx + i:
                    self._rowspan_trackers.append(None)
                # If rowspan > 1, track for future rows
                if rowspan > 1:
                    self._rowspan_trackers[col_idx + i] = {
                        "value": cell_text,
                        "rows_left": rowspan - 1,
                        "colspan": 1,
                    }
                else:
                    self._rowspan_trackers[col_idx + i] = None
            col_idx += colspan
        return col_idx

    def _parse_included_table(
        self,
        dom: BeautifulSoup,
        row: Tag,
        column_to_attr: Dict[int, str],
        name_attr: str,
        table_nesting_level: int,
        include_depth: int,
        level_nodes: Dict[int, Node],
        root: Node,
    ) -> None:
        """Recursively parse Included Table."""
        include_anchor = row.find("a", {"class": "xref"})
        if not include_anchor:
            self.logger.warning(f"Nesting Level: {table_nesting_level}, Include Table Id not found")
            return

        include_table_id = include_anchor["href"].split("#", 1)[-1]
        self.logger.debug(f"Nesting Level: {table_nesting_level}, Include Table Id: {include_table_id}")

        included_table_tree = self.parse_table(
            dom,
            include_table_id,
            column_to_attr=column_to_attr,
            name_attr=name_attr,
            table_nesting_level=table_nesting_level,
            include_depth=include_depth,
        )
        if not included_table_tree:
            return

        self._nest_included_table(included_table_tree, level_nodes, table_nesting_level, root)

    def _nest_included_table(
        self, included_table_tree: Node, level_nodes: Dict[int, Node], row_nesting_level: int, root: Node
    ) -> None:
        parent_node = level_nodes.get(row_nesting_level - 1, root)
        for child in included_table_tree.children:
            child.parent = parent_node

    def _create_node(
        self, node_name: str, row_data: Dict[str, Any], row_nesting_level: int, level_nodes: Dict[int, Node], root: Node
    ) -> None:
        parent_node = level_nodes.get(row_nesting_level - 1, root)
        self.logger.debug(
            f"Nesting Level: {row_nesting_level}, Name: {node_name}, "
            f"Parent: {parent_node.name if parent_node else 'None'}"
        )
        node = Node(node_name, parent=parent_node, **row_data)
        level_nodes[row_nesting_level] = node

    def _extract_header(self, table: Tag, column_to_attr: Dict[int, str]) -> list:
        """Extract headers from the table and saves them in the headers attribute.

        Realign the keys in column_to_attr to consecutive indices if the number of columns in the table
        is less than the maximum key in column_to_attr, to handle cases where the mapping is out of sync
        with the actual table structure.

        Args:
            table: The table element from which to extract headers.
            column_to_attr: Mapping between index of columns to parse and attributes name. 

        """
        cells = table.find_all("th")
        num_columns = len(cells)
        # If the mapping has non-consecutive keys and the table has fewer columns, realign
        if max(column_to_attr.keys()) >= num_columns:
            # Map consecutive indices to the same attribute names, skipping as needed
            sorted_attrs = [column_to_attr[k] for k in sorted(column_to_attr.keys())]
            realigned_col_to_attr = dict(enumerate(sorted_attrs))
            column_to_attr = realigned_col_to_attr

        header = []
        header.extend(
            cells[col_idx].get_text(strip=True)
            for col_idx in column_to_attr
            if col_idx < len(cells)
        )
        self.logger.info(f"Extracted Header: {header}")
        return header

    def _sanitize_string(self, input_string: str) -> str:
        """Sanitize string to use it as a node attribute name.

        - Convert non-ASCII characters to closest ASCII equivalents
        - Replace space characters with underscores
        - Replace parentheses characters with dashes

        Args:
            input_string (str): The string to be sanitized.

        Returns:
            str: The sanitized string.

        """
        # Normalize the string to NFC form and transliterate to ASCII
        normalized_str = unidecode(input_string.lower())
        return re.sub(
            r"[ \-()']",
            lambda match: "-" if match.group(0) in "()" else "_",
            normalized_str,
        )

__init__(logger=None)

Initialize the DOMTableSpecParser.

Sets up the parser with an optional logger and a DOMUtils instance for DOM navigation.

PARAMETER DESCRIPTION
logger

Logger instance to use. If None, a default logger is created.

TYPE: Optional[Logger] DEFAULT: None

Source code in src/dcmspec/dom_table_spec_parser.py
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, logger=None):
    """Initialize the DOMTableSpecParser.

    Sets up the parser with an optional logger and a DOMUtils instance for DOM navigation.

    Args:
        logger (Optional[logging.Logger]): Logger instance to use. If None, a default logger is created.

    """
    super().__init__(logger=logger)

    self.dom_utils = DOMUtils(logger=self.logger)

get_version(dom, table_id)

Retrieve the DICOM Standard version from the DOM.

PARAMETER DESCRIPTION
dom

The BeautifulSoup DOM object.

TYPE: BeautifulSoup

table_id

The ID of the table to retrieve.

TYPE: str

RETURNS DESCRIPTION
info_node

The info tree node.

TYPE: str

Source code in src/dcmspec/dom_table_spec_parser.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def get_version(self, dom: BeautifulSoup, table_id: str) -> str:
    """Retrieve the DICOM Standard version from the DOM.

    Args:
        dom: The BeautifulSoup DOM object.
        table_id: The ID of the table to retrieve.

    Returns:
        info_node: The info tree node.

    """
    version = self._version_from_book(dom) or self._version_from_section(dom)
    if not version:
        version = ""
        self.logger.warning("DICOM Standard version not found")
    return version

parse(dom, table_id, column_to_attr, name_attr, include_depth=None, skip_columns=None)

Parse specification metadata and content from tables in the DOM.

Parses tables within the DOM of a DICOM document and returns a tuple containing the metadata node and the table content node as structured in-memory representations.

PARAMETER DESCRIPTION
dom

The parsed XHTML DOM object.

TYPE: BeautifulSoup

table_id

The ID of the table to parse.

TYPE: str

column_to_attr

Mapping from column indices to attribute names for tree nodes.

TYPE: Dict[int, str]

name_attr

The attribute name to use for building node names.

TYPE: str

include_depth

The depth to which included tables should be parsed. None means unlimited.

TYPE: Optional[int] DEFAULT: None

skip_columns

List of column indices to skip if the row is missing a column.

TYPE: Optional[list[int]] DEFAULT: None

RETURNS DESCRIPTION
tuple[Node, Node]

Tuple[Node, Node]: The metadata node and the table content node.

Source code in src/dcmspec/dom_table_spec_parser.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def parse(
    self,
    dom: BeautifulSoup,
    table_id: str,
    column_to_attr: Dict[int, str],
    name_attr: str,
    include_depth: Optional[int] = None,  # None means unlimited
    skip_columns: Optional[list[int]] = None,
) -> tuple[Node, Node]:
    """Parse specification metadata and content from tables in the DOM.

    Parses tables within the DOM of a DICOM document and returns a tuple containing
    the metadata node and the table content node as structured in-memory representations.

    Args:
        dom (BeautifulSoup): The parsed XHTML DOM object.
        table_id (str): The ID of the table to parse.
        column_to_attr (Dict[int, str]): Mapping from column indices to attribute names for tree nodes.
        name_attr (str): The attribute name to use for building node names.
        include_depth (Optional[int], optional): The depth to which included tables should be parsed. 
            None means unlimited.
        skip_columns (Optional[list[int]]): List of column indices to skip if the row is missing a column.

    Returns:
        Tuple[Node, Node]: The metadata node and the table content node.

    """
    self._skipped_columns_flag = False

    content = self.parse_table(
        dom, table_id, column_to_attr, name_attr, include_depth=include_depth, skip_columns=skip_columns
    )

    # If we ever skipped columns, remove them from metadata.column_to_attr and realign keys
    if skip_columns and self._skipped_columns_flag:
        kept_items = [(k, v) for k, v in column_to_attr.items() if k not in skip_columns]
        filtered_column_to_attr = {i: v for i, (k, v) in enumerate(kept_items)}
    else:
        filtered_column_to_attr = column_to_attr

    metadata = self.parse_metadata(dom, table_id, filtered_column_to_attr)
    metadata.column_to_attr = filtered_column_to_attr
    metadata.table_id = table_id
    if include_depth is not None:
        metadata.include_depth = int(include_depth)
    return metadata, content

parse_metadata(dom, table_id, column_to_attr)

Parse specification metadata from the document and the table within the DOM of a DICOM document.

This method extracts the version of the DICOM standard and the headers of the tables.

PARAMETER DESCRIPTION
dom

The BeautifulSoup DOM object.

TYPE: BeautifulSoup

table_id

The ID of the table to parse.

TYPE: str

column_to_attr

Mapping between index of columns to parse and attributes name.

TYPE: Dict[int, str]

RETURNS DESCRIPTION
metadata_node

The root node of the tree representation of the specification metadata.

TYPE: Node

Source code in src/dcmspec/dom_table_spec_parser.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def parse_metadata(
    self,
    dom: BeautifulSoup,
    table_id: str,
    column_to_attr: Dict[int, str],
) -> Node:
    """Parse specification metadata from the document and the table within the DOM of a DICOM document.

    This method extracts the version of the DICOM standard and the headers of the tables.

    Args:
        dom: The BeautifulSoup DOM object.
        table_id: The ID of the table to parse.
        column_to_attr: Mapping between index of columns to parse and attributes name.

    Returns:
        metadata_node: The root node of the tree representation of the specification metadata.

    """
    table = self.dom_utils.get_table(dom, table_id)
    if not table:
        raise ValueError(f"Table with id '{table_id}' not found.")

    metadata = Node("metadata")
    # Parse the DICOM Standard document information
    version = self.get_version(dom, table_id)
    metadata.version = version
    # Parse the Attribute table header
    header = self._extract_header(table, column_to_attr=column_to_attr)
    metadata.header = header

    return metadata

parse_table(dom, table_id, column_to_attr, name_attr, table_nesting_level=0, include_depth=None, skip_columns=None)

Parse specification content from tables within the DOM of a DICOM document.

This method extracts data from each row of the table, handles nested tables indicated by "Include" links, and builds a tree-like structure of the DICOM attributes which root node is assigned to the attribute model.

PARAMETER DESCRIPTION
dom

The BeautifulSoup DOM object.

TYPE: BeautifulSoup

table_id

The ID of the table to parse.

TYPE: str

column_to_attr

Mapping between index of columns to parse and tree nodes attributes names

TYPE: Dict[int, str]

name_attr

tree node attribute name to use to build node name

TYPE: str

table_nesting_level

The nesting level of the table (used for recursion call only).

TYPE: int DEFAULT: 0

include_depth

The depth to which included tables should be parsed.

TYPE: Optional[int] DEFAULT: None

skip_columns

List of column indices to skip if the row is missing a column.

TYPE: Optional[list[int]] DEFAULT: None

RETURNS DESCRIPTION
root

The root node of the tree representation of the specification table.

TYPE: Node

Source code in src/dcmspec/dom_table_spec_parser.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def parse_table(
    self,
    dom: BeautifulSoup,
    table_id: str,
    column_to_attr: Dict[int, str],
    name_attr: str,
    table_nesting_level: int = 0,
    include_depth: Optional[int] = None,  # None means unlimited
    skip_columns: Optional[list[int]] = None,
) -> Node:
    """Parse specification content from tables within the DOM of a DICOM document.

    This method extracts data from each row of the table, handles nested
    tables indicated by "Include" links, and builds a tree-like structure
    of the DICOM attributes which root node is assigned to the attribute
    model.

    Args:
        dom: The BeautifulSoup DOM object.
        table_id: The ID of the table to parse.
        column_to_attr: Mapping between index of columns to parse and tree nodes attributes names
        name_attr: tree node attribute name to use to build node name
        table_nesting_level: The nesting level of the table (used for recursion call only).
        include_depth: The depth to which included tables should be parsed.
        skip_columns (Optional[list[int]]): List of column indices to skip if the row is missing a column.

    Returns:
        root: The root node of the tree representation of the specification table.

    """
    self.logger.info(f"Nesting Level: {table_nesting_level}, Parsing table with id {table_id}")
    # Maps column indices in the DICOM standard table to corresponding node attribute names
    # for constructing a tree-like representation of the table's data.
    # self.column_to_attr = {**{0: "elem_name", 1: "elem_tag"}, **(column_to_attr or {})}

    table = self.dom_utils.get_table(dom, table_id)
    if not table:
        raise ValueError(f"Table with id '{table_id}' not found.")

    if not column_to_attr:
        raise ValueError("Columns to node attributes missing.")
    else:
        self.column_to_attr = column_to_attr

    root = Node("content")
    level_nodes: Dict[int, Node] = {0: root}

    for row in table.find_all("tr")[1:]:
        row_data = self._extract_row_data(row, skip_columns=skip_columns)
        if row_data[name_attr] is None:
            continue  # Skip empty rows
        row_nesting_level = table_nesting_level + row_data[name_attr].count(">")

        # Add nesting level symbols to included table element names except if row is a title
        if table_nesting_level > 0 and not row_data[name_attr].isupper():
            row_data[name_attr] = ">" * table_nesting_level + row_data[name_attr]

        # Process Include statement unless include_depth is defined and not reached
        if "Include" in row_data[name_attr] and (include_depth is None or include_depth > 0):
            next_depth = None if include_depth is None else include_depth - 1
            self._parse_included_table(
                dom, row, column_to_attr, name_attr, row_nesting_level, next_depth, level_nodes, root
            )
        else:
            node_name = self._sanitize_string(row_data[name_attr])
            self._create_node(node_name, row_data, row_nesting_level, level_nodes, root)

    self.logger.info(f"Nesting Level: {table_nesting_level}, Table parsed successfully")
    return root