Skip to content

UPSXHTMLDocHandler

dcmspec.ups_xhtml_doc_handler.UPSXHTMLDocHandler

Bases: XHTMLDocHandler

Subclass of XHTMLDocHandler that applies UPS-specific table patching.

This handler applies UPS-specific patching to DICOM XHTML tables after parsing. It corrects known issues in Table CC.2.5-3 of DICOM PS3.4, where 'Include' rows under certain sequence attribute rows are missing a '>' nesting symbol. The affected sequences are: - Output Information Sequence - Gender Identity Code Sequence - Sex Parameters for Clinical Use Category Code Sequence - Pronoun Code Sequence

Source code in src/dcmspec/ups_xhtml_doc_handler.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
class UPSXHTMLDocHandler(XHTMLDocHandler):
    """Subclass of XHTMLDocHandler that applies UPS-specific table patching.

    This handler applies UPS-specific patching to DICOM XHTML tables after parsing.
    It corrects known issues in Table CC.2.5-3 of DICOM PS3.4, where 'Include' rows under certain
    sequence attribute rows are missing a '>' nesting symbol. The affected sequences are:
        - Output Information Sequence
        - Gender Identity Code Sequence
        - Sex Parameters for Clinical Use Category Code Sequence
        - Pronoun Code Sequence
    """

    def __init__(self, config=None, logger=None):
        """Initialize the UPSXHTMLDocHandler.

        Sets up the handler with the given configuration and logger, and creates a DOMUtils
        instance for DOM navigation.

        Args:
            config (optional): Configuration object for the handler.
            logger (optional): Logger instance to use. If None, a default logger is created.

        """
        super().__init__(config=config, logger=logger)
        self.dom_utils = DOMUtils(logger=self.logger)

    def parse_dom(self, file_path: str) -> BeautifulSoup:
        """Parse a cached XHTML file and apply UPS-specific table patching.

        Calls the base class's parse_dom, then patches the Output Information Sequence Include nesting level.

        Args:
            file_path (str): Path to the cached XHTML file to parse.

        Returns:
            BeautifulSoup: The patched DOM object.

        """
        dom = super().parse_dom(file_path)
        # Patch the table after parsing
        self._patch_table(dom, "table_CC.2.5-3")  # or pass table_id dynamically if needed
        return dom

    def _patch_table(self, dom, table_id):
        """Patch the specified XHTML table to fix Include nesting level errors.

        In the UPS, the 'Include' row under some sequence attribute rows are missing one '>' nesting symbol.

        Args:
            dom: The BeautifulSoup DOM object representing the XHTML document.
            table_id: The ID of the table to patch.

        """
        patch_labels = [
            ">Output Information Sequence",
            ">Gender Identity Code Sequence",
            ">Sex Parameters for Clinical Use Category Code Sequence",
            ">Pronoun Code Sequence",
        ]
        for label in patch_labels:
            target_element_id = self._search_element_id(dom, table_id, label)
            if not target_element_id:
                self.logger.warning(f"{label} Include Row element ID not found")
                continue
            element = dom.find(id=target_element_id).find_parent()
            span_element = element.find("span", class_="italic")
            if span_element:
                children_to_modify = [
                    child for child in span_element.children
                    if isinstance(child, str) and ">Include" in child
                ]
                for child in children_to_modify:
                    new_text = child.replace(">Include", ">>Include")
                    child.replace_with(new_text)

    def _search_element_id(self, dom, table_id, sequence_label):
        table = self.dom_utils.get_table(dom, table_id)
        if not table:
            return None

        self.logger.debug(f"Table with id {table_id} found")
        tr_elements = table.find_all("tr")
        include_id = self._search_sequence_include_id(tr_elements, sequence_label)

        if include_id is None:
            self.logger.debug("No <tr> matching criteria found")

        return include_id

    def _search_sequence_include_id(self, tr_elements, sequence_label):
        target_found = False
        for tr in tr_elements:
            first_td = tr.find("td")
            if first_td and first_td.get_text(strip=True) == sequence_label:
                self.logger.debug(f"{sequence_label} row found")
                target_found = True
                break

        if target_found:
            tr = tr.find_next("tr")
            if tr is not None:
                first_td = tr.find("td")
                if first_td and first_td.get_text(strip=True).startswith(">Include"):
                    self.logger.debug("Include <tr> found")
                    return first_td.find("a")["id"]

        return None

__init__(config=None, logger=None)

Initialize the UPSXHTMLDocHandler.

Sets up the handler with the given configuration and logger, and creates a DOMUtils instance for DOM navigation.

PARAMETER DESCRIPTION
config

Configuration object for the handler.

TYPE: optional DEFAULT: None

logger

Logger instance to use. If None, a default logger is created.

TYPE: optional DEFAULT: None

Source code in src/dcmspec/ups_xhtml_doc_handler.py
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, config=None, logger=None):
    """Initialize the UPSXHTMLDocHandler.

    Sets up the handler with the given configuration and logger, and creates a DOMUtils
    instance for DOM navigation.

    Args:
        config (optional): Configuration object for the handler.
        logger (optional): Logger instance to use. If None, a default logger is created.

    """
    super().__init__(config=config, logger=logger)
    self.dom_utils = DOMUtils(logger=self.logger)

parse_dom(file_path)

Parse a cached XHTML file and apply UPS-specific table patching.

Calls the base class's parse_dom, then patches the Output Information Sequence Include nesting level.

PARAMETER DESCRIPTION
file_path

Path to the cached XHTML file to parse.

TYPE: str

RETURNS DESCRIPTION
BeautifulSoup

The patched DOM object.

TYPE: BeautifulSoup

Source code in src/dcmspec/ups_xhtml_doc_handler.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def parse_dom(self, file_path: str) -> BeautifulSoup:
    """Parse a cached XHTML file and apply UPS-specific table patching.

    Calls the base class's parse_dom, then patches the Output Information Sequence Include nesting level.

    Args:
        file_path (str): Path to the cached XHTML file to parse.

    Returns:
        BeautifulSoup: The patched DOM object.

    """
    dom = super().parse_dom(file_path)
    # Patch the table after parsing
    self._patch_table(dom, "table_CC.2.5-3")  # or pass table_id dynamically if needed
    return dom