Utility class for DOM navigation and extraction in DICOM XHTML documents.
Provides methods for locating tables and table IDs within a parsed BeautifulSoup DOM,
with optional logging for warnings and debug information.
Typical usage
dom_utils = DOMUtils(logger=logger)
table = dom_utils.get_table(dom, table_id)
table_id = dom_utils.get_table_id_from_section(dom, section_id)
Source code in src/dcmspec/dom_utils.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109 | class DOMUtils:
"""Utility class for DOM navigation and extraction in DICOM XHTML documents.
Provides methods for locating tables and table IDs within a parsed BeautifulSoup DOM,
with optional logging for warnings and debug information.
Typical usage:
dom_utils = DOMUtils(logger=logger)
table = dom_utils.get_table(dom, table_id)
table_id = dom_utils.get_table_id_from_section(dom, section_id)
"""
def __init__(self, logger: Optional[logging.Logger] = None):
"""Initialize DOMUtils with an optional logger.
Args:
logger (Optional[logging.Logger]): Logger instance to use for warnings and debug messages.
If None, a default logger is created.
"""
if logger is not None and not isinstance(logger, logging.Logger):
raise TypeError("logger must be an instance of logging.Logger or None")
self.logger = logger or logging.getLogger(self.__class__.__name__)
def get_table(self, dom: BeautifulSoup, table_id: str) -> Optional[Tag]:
"""Retrieve the table element with the specified ID from the DOM.
DocBook XML to XHTML conversion stylesheets enclose tables in a
<div class="table"> with the table identifier in <a id="table_ID"></a>
Searches for an anchor tag with the given ID and then finds the next
table element.
Args:
dom: The BeautifulSoup DOM object.
table_id: The ID of the table to retrieve.
Returns:
The table element if found, otherwise None.
"""
anchor = dom.find("a", {"id": table_id})
if anchor is None:
self.logger.warning(f"Table Id {table_id} not found.")
return None
table_div = anchor.find_parent("div", class_="table")
if not table_div:
self.logger.warning(f"Parent <div class='table'> for Table Id {table_id} not found.")
return None
table = table_div.find("table")
if not table:
self.logger.warning(f"Table for Table Id {table_id} not found inside its <div class='table'>.")
return None
return table
def get_table_id_from_section(self, dom: BeautifulSoup, section_id: str) -> Optional[str]:
"""Get the id of the first table in a section.
Retrieve the first table_id (anchor id) of a <div class="table"> inside a <div class="section">
that contains an <a> anchor with the given section id.
Args:
dom (BeautifulSoup): The parsed XHTML DOM object.
section_id (str): The id of the section to search for the table_id.
Returns:
Optional[str]: The id of the first table anchor found, or None if not found.
"""
# Find the anchor with the given id
anchor = dom.find("a", {"id": section_id})
if not anchor:
self.logger.warning(f"Section with id '{section_id}' not found.")
return None
# Find the parent section div
section_div = anchor.find_parent("div", class_="section")
if not section_div:
self.logger.warning(f"No parent <div class='section'> found for section id '{section_id}'.")
return None
# Find the first <div class="table"> inside the section
table_div = section_div.find("div", class_="table")
if not table_div:
self.logger.warning(f"No <div class='table'> found in section for section id '{section_id}'.")
return None
# Find the first anchor with an id inside the table div (the table id)
table_anchor = table_div.find("a", id=True)
if table_anchor and table_anchor.get("id"):
return table_anchor["id"]
self.logger.warning(f"No table id found in <div class='table'> for section id '{section_id}'.")
return None
|
__init__(logger=None)
Initialize DOMUtils with an optional logger.
PARAMETER |
DESCRIPTION |
logger
|
Logger instance to use for warnings and debug messages.
If None, a default logger is created.
TYPE:
Optional[Logger]
DEFAULT:
None
|
Source code in src/dcmspec/dom_utils.py
28
29
30
31
32
33
34
35
36
37
38 | def __init__(self, logger: Optional[logging.Logger] = None):
"""Initialize DOMUtils with an optional logger.
Args:
logger (Optional[logging.Logger]): Logger instance to use for warnings and debug messages.
If None, a default logger is created.
"""
if logger is not None and not isinstance(logger, logging.Logger):
raise TypeError("logger must be an instance of logging.Logger or None")
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
get_table(dom, table_id)
Retrieve the table element with the specified ID from the DOM.
DocBook XML to XHTML conversion stylesheets enclose tables in a
with the table identifier in
Searches for an anchor tag with the given ID and then finds the next
table element.
PARAMETER |
DESCRIPTION |
dom
|
The BeautifulSoup DOM object.
TYPE:
BeautifulSoup
|
table_id
|
The ID of the table to retrieve.
TYPE:
str
|
RETURNS |
DESCRIPTION |
Optional[Tag]
|
The table element if found, otherwise None.
|
Source code in src/dcmspec/dom_utils.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69 | def get_table(self, dom: BeautifulSoup, table_id: str) -> Optional[Tag]:
"""Retrieve the table element with the specified ID from the DOM.
DocBook XML to XHTML conversion stylesheets enclose tables in a
<div class="table"> with the table identifier in <a id="table_ID"></a>
Searches for an anchor tag with the given ID and then finds the next
table element.
Args:
dom: The BeautifulSoup DOM object.
table_id: The ID of the table to retrieve.
Returns:
The table element if found, otherwise None.
"""
anchor = dom.find("a", {"id": table_id})
if anchor is None:
self.logger.warning(f"Table Id {table_id} not found.")
return None
table_div = anchor.find_parent("div", class_="table")
if not table_div:
self.logger.warning(f"Parent <div class='table'> for Table Id {table_id} not found.")
return None
table = table_div.find("table")
if not table:
self.logger.warning(f"Table for Table Id {table_id} not found inside its <div class='table'>.")
return None
return table
|
get_table_id_from_section(dom, section_id)
Get the id of the first table in a section.
Retrieve the first table_id (anchor id) of a
inside a
that contains an
anchor with the given section id.
PARAMETER |
DESCRIPTION |
dom
|
The parsed XHTML DOM object.
TYPE:
BeautifulSoup
|
section_id
|
The id of the section to search for the table_id.
TYPE:
str
|
RETURNS |
DESCRIPTION |
Optional[str]
|
Optional[str]: The id of the first table anchor found, or None if not found.
|
Source code in src/dcmspec/dom_utils.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109 | def get_table_id_from_section(self, dom: BeautifulSoup, section_id: str) -> Optional[str]:
"""Get the id of the first table in a section.
Retrieve the first table_id (anchor id) of a <div class="table"> inside a <div class="section">
that contains an <a> anchor with the given section id.
Args:
dom (BeautifulSoup): The parsed XHTML DOM object.
section_id (str): The id of the section to search for the table_id.
Returns:
Optional[str]: The id of the first table anchor found, or None if not found.
"""
# Find the anchor with the given id
anchor = dom.find("a", {"id": section_id})
if not anchor:
self.logger.warning(f"Section with id '{section_id}' not found.")
return None
# Find the parent section div
section_div = anchor.find_parent("div", class_="section")
if not section_div:
self.logger.warning(f"No parent <div class='section'> found for section id '{section_id}'.")
return None
# Find the first <div class="table"> inside the section
table_div = section_div.find("div", class_="table")
if not table_div:
self.logger.warning(f"No <div class='table'> found in section for section id '{section_id}'.")
return None
# Find the first anchor with an id inside the table div (the table id)
table_anchor = table_div.find("a", id=True)
if table_anchor and table_anchor.get("id"):
return table_anchor["id"]
self.logger.warning(f"No table id found in <div class='table'> for section id '{section_id}'.")
return None
|