Source code for medkit.core.text.span
from __future__ import annotations
__all__ = ["AnySpan", "ModifiedSpan", "Span"]
import abc
import dataclasses
from typing import Any
from typing_extensions import Self
from medkit.core import dict_conv
[docs]
class AnySpan(abc.ABC, dict_conv.SubclassMapping):
length: int
def __init_subclass__(cls):
AnySpan.register_subclass(cls)
super().__init_subclass__()
@classmethod
def from_dict(cls, ann_dict: dict[str, Any]) -> Self:
subclass = cls.get_subclass_for_data_dict(ann_dict)
if subclass is None:
msg = (
"AnySpan is an abstract class. Its class method `from_dict` is"
" only used for calling the correct subclass `from_dict`"
)
raise NotImplementedError(msg)
return subclass.from_dict(ann_dict)
@abc.abstractmethod
def to_dict(self) -> dict[str, Any]:
raise NotImplementedError
[docs]
@dataclasses.dataclass(frozen=True)
class Span(AnySpan):
"""Slice of text extracted from the original text
Parameters
----------
start : int
Index of the first character in the original text
end : int
Index of the last character in the original text, plus one
"""
start: int
end: int
@property
def length(self):
return self.end - self.start
def to_dict(self) -> dict[str, Any]:
span_dict = {"start": self.start, "end": self.end}
dict_conv.add_class_name_to_data_dict(self, span_dict)
return span_dict
[docs]
def overlaps(self, other: Span):
"""Test if 2 spans reference at least one character in common"""
return (self.start < other.end) and (self.end > other.start)
[docs]
@classmethod
def from_dict(cls, span_dict: dict[str, Any]) -> Self:
"""Creates a Span from a dict
Parameters
----------
span_dict: dict
A dictionary from a serialized span as generated by to_dict()
"""
return cls(start=span_dict["start"], end=span_dict["end"])
[docs]
@dataclasses.dataclass
class ModifiedSpan(AnySpan):
"""Slice of text not present in the original text
Parameters
----------
length : int
Number of characters
replaced_spans : list of Span
Slices of the original text that this span is replacing
"""
length: int
replaced_spans: list[Span]
def to_dict(self) -> dict[str, Any]:
replaced_spans = [s.to_dict() for s in self.replaced_spans]
span_dict = {
"length": self.length,
"replaced_spans": replaced_spans,
}
dict_conv.add_class_name_to_data_dict(self, span_dict)
return span_dict
[docs]
@classmethod
def from_dict(cls, modified_span_dict: dict[str, Any]) -> Self:
"""Creates a Modified from a dict
Parameters
----------
modified_span_dict : dict of str to Any
A dictionary from a serialized ModifiedSpan as generated by to_dict()
"""
replaced_spans = [Span.from_dict(s) for s in modified_span_dict["replaced_spans"]]
return cls(modified_span_dict["length"], replaced_spans)