Source code for medkit.text.postprocessing.attribute_duplicator
__all__ = ["AttributeDuplicator"]
from typing import List, Optional
from medkit.core import Attribute, Operation
from medkit.core.text import Segment
from medkit.text.postprocessing import alignment_utils
[docs]class AttributeDuplicator(Operation):
"""Annotator to copy attributes from a source segment to its nested segments.
For each attribute to be duplicated, a new attribute is created in the nested segment
"""
def __init__(
self,
attr_labels: List[str],
uid: Optional[str] = None,
):
"""Instantiate the attribute duplicator
Parameters
----------
attr_labels:
Labels of the attributes to copy
uid:
Identifier of the annotator
"""
self.attr_labels = attr_labels
# Pass all arguments to super (remove self)
init_args = locals()
init_args.pop("self")
super().__init__(**init_args)
[docs] def run(self, source_segments: List[Segment], target_segments: List[Segment]):
"""Add attributes from source segments to all nested segments.
The nested segments are chosen among the `target_segments` based on their spans.
Parameters
----------
source_segments:
List of segments with attributes to copy
target_segments:
List of segments target
"""
nested = alignment_utils.compute_nested_segments(
source_segments, target_segments
)
for parent, children in nested:
attrs_to_copy = [
attr
for label in self.attr_labels
for attr in parent.attrs.get(label=label)
]
# create a new attr in target from the source attr
for attr in attrs_to_copy:
for child in children:
self._duplicate_attr(attr=attr, target=child)
def _duplicate_attr(self, attr: Attribute, target: Segment):
target_attr = attr.copy()
target.attrs.add(target_attr)
if self._prov_tracer is not None:
self._prov_tracer.add_prov(
target_attr, self.description, source_data_items=[attr]
)