Bases: XHTMLDocHandler
Subclass of XHTMLDocHandler that applies UPS-specific table patching.
This handler applies UPS-specific patching to DICOM XHTML tables after parsing.
It corrects known issues in Table CC.2.5-3 of DICOM PS3.4, where 'Include' rows under certain
sequence attribute rows are missing a '>' nesting symbol. The affected sequences are:
- Output Information Sequence
- Gender Identity Code Sequence
- Sex Parameters for Clinical Use Category Code Sequence
- Pronoun Code Sequence
Source code in src/dcmspec/ups_xhtml_doc_handler.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 | class UPSXHTMLDocHandler(XHTMLDocHandler):
"""Subclass of XHTMLDocHandler that applies UPS-specific table patching.
This handler applies UPS-specific patching to DICOM XHTML tables after parsing.
It corrects known issues in Table CC.2.5-3 of DICOM PS3.4, where 'Include' rows under certain
sequence attribute rows are missing a '>' nesting symbol. The affected sequences are:
- Output Information Sequence
- Gender Identity Code Sequence
- Sex Parameters for Clinical Use Category Code Sequence
- Pronoun Code Sequence
"""
def __init__(self, config=None, logger=None):
"""Initialize the UPSXHTMLDocHandler.
Sets up the handler with the given configuration and logger, and creates a DOMUtils
instance for DOM navigation.
Args:
config (optional): Configuration object for the handler.
logger (optional): Logger instance to use. If None, a default logger is created.
"""
super().__init__(config=config, logger=logger)
self.dom_utils = DOMUtils(logger=self.logger)
def parse_dom(self, file_path: str) -> BeautifulSoup:
"""Parse a cached XHTML file and apply UPS-specific table patching.
Calls the base class's parse_dom, then patches the Output Information Sequence Include nesting level.
Args:
file_path (str): Path to the cached XHTML file to parse.
Returns:
BeautifulSoup: The patched DOM object.
"""
dom = super().parse_dom(file_path)
# Patch the table after parsing
self._patch_table(dom, "table_CC.2.5-3") # or pass table_id dynamically if needed
return dom
def _patch_table(self, dom, table_id):
"""Patch the specified XHTML table to fix Include nesting level errors.
In the UPS, the 'Include' row under some sequence attribute rows are missing one '>' nesting symbol.
Args:
dom: The BeautifulSoup DOM object representing the XHTML document.
table_id: The ID of the table to patch.
"""
patch_labels = [
">Output Information Sequence",
">Gender Identity Code Sequence",
">Sex Parameters for Clinical Use Category Code Sequence",
">Pronoun Code Sequence",
]
for label in patch_labels:
target_element_id = self._search_element_id(dom, table_id, label)
if not target_element_id:
self.logger.warning(f"{label} Include Row element ID not found")
continue
element = dom.find(id=target_element_id).find_parent()
span_element = element.find("span", class_="italic")
if span_element:
children_to_modify = [
child for child in span_element.children
if isinstance(child, str) and ">Include" in child
]
for child in children_to_modify:
new_text = child.replace(">Include", ">>Include")
child.replace_with(new_text)
def _search_element_id(self, dom, table_id, sequence_label):
table = self.dom_utils.get_table(dom, table_id)
if not table:
return None
self.logger.debug(f"Table with id {table_id} found")
tr_elements = table.find_all("tr")
include_id = self._search_sequence_include_id(tr_elements, sequence_label)
if include_id is None:
self.logger.debug("No <tr> matching criteria found")
return include_id
def _search_sequence_include_id(self, tr_elements, sequence_label):
target_found = False
for tr in tr_elements:
first_td = tr.find("td")
if first_td and first_td.get_text(strip=True) == sequence_label:
self.logger.debug(f"{sequence_label} row found")
target_found = True
break
if target_found:
tr = tr.find_next("tr")
if tr is not None:
first_td = tr.find("td")
if first_td and first_td.get_text(strip=True).startswith(">Include"):
self.logger.debug("Include <tr> found")
return first_td.find("a")["id"]
return None
|
__init__(config=None, logger=None)
Initialize the UPSXHTMLDocHandler.
Sets up the handler with the given configuration and logger, and creates a DOMUtils
instance for DOM navigation.
PARAMETER |
DESCRIPTION |
config
|
Configuration object for the handler.
TYPE:
optional
DEFAULT:
None
|
logger
|
Logger instance to use. If None, a default logger is created.
TYPE:
optional
DEFAULT:
None
|
Source code in src/dcmspec/ups_xhtml_doc_handler.py
23
24
25
26
27
28
29
30
31
32
33
34
35 | def __init__(self, config=None, logger=None):
"""Initialize the UPSXHTMLDocHandler.
Sets up the handler with the given configuration and logger, and creates a DOMUtils
instance for DOM navigation.
Args:
config (optional): Configuration object for the handler.
logger (optional): Logger instance to use. If None, a default logger is created.
"""
super().__init__(config=config, logger=logger)
self.dom_utils = DOMUtils(logger=self.logger)
|
parse_dom(file_path)
Parse a cached XHTML file and apply UPS-specific table patching.
Calls the base class's parse_dom, then patches the Output Information Sequence Include nesting level.
PARAMETER |
DESCRIPTION |
file_path
|
Path to the cached XHTML file to parse.
TYPE:
str
|
RETURNS |
DESCRIPTION |
BeautifulSoup
|
TYPE:
BeautifulSoup
|
Source code in src/dcmspec/ups_xhtml_doc_handler.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 | def parse_dom(self, file_path: str) -> BeautifulSoup:
"""Parse a cached XHTML file and apply UPS-specific table patching.
Calls the base class's parse_dom, then patches the Output Information Sequence Include nesting level.
Args:
file_path (str): Path to the cached XHTML file to parse.
Returns:
BeautifulSoup: The patched DOM object.
"""
dom = super().parse_dom(file_path)
# Patch the table after parsing
self._patch_table(dom, "table_CC.2.5-3") # or pass table_id dynamically if needed
return dom
|