Source code for medkit.text.postprocessing.attribute_duplicator
from __future__ import annotations
__all__ = ["AttributeDuplicator"]
from typing import TYPE_CHECKING
from medkit.core import Attribute, Operation
from medkit.text.postprocessing import alignment_utils
if TYPE_CHECKING:
from medkit.core.text import Segment
[docs]
class AttributeDuplicator(Operation):
"""Annotator to copy attributes from a source segment to its nested segments.
For each attribute to be duplicated, a new attribute is created in the nested segment
"""
def __init__(
self,
attr_labels: list[str],
uid: str | None = None,
):
"""Instantiate the attribute duplicator
Parameters
----------
attr_labels : list of str
Labels of the attributes to copy
uid : str, optional
Identifier of the annotator
"""
self.attr_labels = attr_labels
# Pass all arguments to super (remove self)
init_args = locals()
init_args.pop("self")
super().__init__(**init_args)
[docs]
def run(self, source_segments: list[Segment], target_segments: list[Segment]):
"""Add attributes from source segments to all nested segments.
The nested segments are chosen among the `target_segments` based on their spans.
Parameters
----------
source_segments : list of Segment
List of segments with attributes to copy
target_segments : list of Segment
List of segments target
"""
nested = alignment_utils.compute_nested_segments(source_segments, target_segments)
for parent, children in nested:
attrs_to_copy = [attr for label in self.attr_labels for attr in parent.attrs.get(label=label)]
# create a new attr in target from the source attr
for attr in attrs_to_copy:
for child in children:
self._duplicate_attr(attr=attr, target=child)
def _duplicate_attr(self, attr: Attribute, target: Segment):
target_attr = attr.copy()
target.attrs.add(target_attr)
if self._prov_tracer is not None:
self._prov_tracer.add_prov(target_attr, self.description, source_data_items=[attr])