Skip to content

DOMUtils

dcmspec.dom_utils.DOMUtils

Utility class for DOM navigation and extraction in DICOM XHTML documents.

Provides methods for locating tables and table IDs within a parsed BeautifulSoup DOM, with optional logging for warnings and debug information.

Typical usage

dom_utils = DOMUtils(logger=logger) table = dom_utils.get_table(dom, table_id) table_id = dom_utils.get_table_id_from_section(dom, section_id)

Source code in src/dcmspec/dom_utils.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class DOMUtils:
    """Utility class for DOM navigation and extraction in DICOM XHTML documents.

    Provides methods for locating tables and table IDs within a parsed BeautifulSoup DOM,
    with optional logging for warnings and debug information.

    Typical usage:
        dom_utils = DOMUtils(logger=logger)
        table = dom_utils.get_table(dom, table_id)
        table_id = dom_utils.get_table_id_from_section(dom, section_id)
    """

    def __init__(self, logger: Optional[logging.Logger] = None):
        """Initialize DOMUtils with an optional logger.

        Args:
            logger (Optional[logging.Logger]): Logger instance to use for warnings and debug messages.
                If None, a default logger is created.

        """
        if logger is not None and not isinstance(logger, logging.Logger):
            raise TypeError("logger must be an instance of logging.Logger or None")
        self.logger = logger or logging.getLogger(self.__class__.__name__)

    def get_table(self, dom: BeautifulSoup, table_id: str) -> Optional[Tag]:
        """Retrieve the table element with the specified ID from the DOM.

        DocBook XML to XHTML conversion stylesheets enclose tables in a
        <div class="table"> with the table identifier in <a id="table_ID"></a>

        Searches for an anchor tag with the given ID and then finds the next
        table element.

        Args:
            dom: The BeautifulSoup DOM object.
            table_id: The ID of the table to retrieve.

        Returns:
            The table element if found, otherwise None.

        """
        anchor = dom.find("a", {"id": table_id})
        if anchor is None:
            self.logger.warning(f"Table Id {table_id} not found.")
            return None
        table_div = anchor.find_parent("div", class_="table")
        if not table_div:
            self.logger.warning(f"Parent <div class='table'> for Table Id {table_id} not found.")
            return None
        table = table_div.find("table")
        if not table:
            self.logger.warning(f"Table for Table Id {table_id} not found inside its <div class='table'>.")
            return None
        return table

    def get_table_id_from_section(self, dom: BeautifulSoup, section_id: str) -> Optional[str]:
        """Get the id of the first table in a section.

        Retrieve the first table_id (anchor id) of a <div class="table"> inside a <div class="section">
        that contains an <a> anchor with the given section id.

        Args:
            dom (BeautifulSoup): The parsed XHTML DOM object.
            section_id (str): The id of the section to search for the table_id.

        Returns:
            Optional[str]: The id of the first table anchor found, or None if not found.

        """
        # Find the anchor with the given id
        anchor = dom.find("a", {"id": section_id})
        if not anchor:
            self.logger.warning(f"Section with id '{section_id}' not found.")
            return None

        # Find the parent section div
        section_div = anchor.find_parent("div", class_="section")
        if not section_div:
            self.logger.warning(f"No parent <div class='section'> found for section id '{section_id}'.")
            return None

        # Find the first <div class="table"> inside the section
        table_div = section_div.find("div", class_="table")
        if not table_div:
            self.logger.warning(f"No <div class='table'> found in section for section id '{section_id}'.")
            return None

        # Find the first anchor with an id inside the table div (the table id)
        table_anchor = table_div.find("a", id=True)
        if table_anchor and table_anchor.get("id"):
            return table_anchor["id"]

        self.logger.warning(f"No table id found in <div class='table'> for section id '{section_id}'.")
        return None

__init__(logger=None)

Initialize DOMUtils with an optional logger.

PARAMETER DESCRIPTION
logger

Logger instance to use for warnings and debug messages. If None, a default logger is created.

TYPE: Optional[Logger] DEFAULT: None

Source code in src/dcmspec/dom_utils.py
28
29
30
31
32
33
34
35
36
37
38
def __init__(self, logger: Optional[logging.Logger] = None):
    """Initialize DOMUtils with an optional logger.

    Args:
        logger (Optional[logging.Logger]): Logger instance to use for warnings and debug messages.
            If None, a default logger is created.

    """
    if logger is not None and not isinstance(logger, logging.Logger):
        raise TypeError("logger must be an instance of logging.Logger or None")
    self.logger = logger or logging.getLogger(self.__class__.__name__)

get_table(dom, table_id)

Retrieve the table element with the specified ID from the DOM.

DocBook XML to XHTML conversion stylesheets enclose tables in a

with the table identifier in Searches for an anchor tag with the given ID and then finds the next table element.
PARAMETER DESCRIPTION
dom

The BeautifulSoup DOM object.

TYPE: BeautifulSoup

table_id

The ID of the table to retrieve.

TYPE: str

RETURNS DESCRIPTION
Optional[Tag]

The table element if found, otherwise None.

Source code in src/dcmspec/dom_utils.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def get_table(self, dom: BeautifulSoup, table_id: str) -> Optional[Tag]:
    """Retrieve the table element with the specified ID from the DOM.

    DocBook XML to XHTML conversion stylesheets enclose tables in a
    <div class="table"> with the table identifier in <a id="table_ID"></a>

    Searches for an anchor tag with the given ID and then finds the next
    table element.

    Args:
        dom: The BeautifulSoup DOM object.
        table_id: The ID of the table to retrieve.

    Returns:
        The table element if found, otherwise None.

    """
    anchor = dom.find("a", {"id": table_id})
    if anchor is None:
        self.logger.warning(f"Table Id {table_id} not found.")
        return None
    table_div = anchor.find_parent("div", class_="table")
    if not table_div:
        self.logger.warning(f"Parent <div class='table'> for Table Id {table_id} not found.")
        return None
    table = table_div.find("table")
    if not table:
        self.logger.warning(f"Table for Table Id {table_id} not found inside its <div class='table'>.")
        return None
    return table

get_table_id_from_section(dom, section_id)

Get the id of the first table in a section.

Retrieve the first table_id (anchor id) of a

inside a
that contains an anchor with the given section id.

PARAMETER DESCRIPTION
dom

The parsed XHTML DOM object.

TYPE: BeautifulSoup

section_id

The id of the section to search for the table_id.

TYPE: str

RETURNS DESCRIPTION
Optional[str]

Optional[str]: The id of the first table anchor found, or None if not found.

Source code in src/dcmspec/dom_utils.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def get_table_id_from_section(self, dom: BeautifulSoup, section_id: str) -> Optional[str]:
    """Get the id of the first table in a section.

    Retrieve the first table_id (anchor id) of a <div class="table"> inside a <div class="section">
    that contains an <a> anchor with the given section id.

    Args:
        dom (BeautifulSoup): The parsed XHTML DOM object.
        section_id (str): The id of the section to search for the table_id.

    Returns:
        Optional[str]: The id of the first table anchor found, or None if not found.

    """
    # Find the anchor with the given id
    anchor = dom.find("a", {"id": section_id})
    if not anchor:
        self.logger.warning(f"Section with id '{section_id}' not found.")
        return None

    # Find the parent section div
    section_div = anchor.find_parent("div", class_="section")
    if not section_div:
        self.logger.warning(f"No parent <div class='section'> found for section id '{section_id}'.")
        return None

    # Find the first <div class="table"> inside the section
    table_div = section_div.find("div", class_="table")
    if not table_div:
        self.logger.warning(f"No <div class='table'> found in section for section id '{section_id}'.")
        return None

    # Find the first anchor with an id inside the table div (the table id)
    table_anchor = table_div.find("a", id=True)
    if table_anchor and table_anchor.get("id"):
        return table_anchor["id"]

    self.logger.warning(f"No table id found in <div class='table'> for section id '{section_id}'.")
    return None