"""
This package needs extra-dependencies not installed as core dependencies of medkit.
To install them, use `pip install medkit[edsnlp]`.
"""
__all__ = [
"EDSNLPPipeline",
"EDSNLPDocPipeline",
"build_date_attribute",
"build_duration_attribute",
"build_adicap_attribute",
"build_tnm_attribute",
"build_measurement_attribute",
"DEFAULT_ATTRIBUTE_FACTORIES",
]
from typing import Callable, Dict, Optional, List
from edsnlp.pipelines.ner.adicap.models import AdicapCode as EDSNLP_AdicapCode
from edsnlp.pipelines.ner.tnm.model import TNM as EDSNLP_TNM
from edsnlp.pipelines.misc.dates.models import (
AbsoluteDate as EDSNLP_AbsoluteDate,
RelativeDate as EDSNLP_RelativeDate,
Duration as EDSNLP_Duration,
Direction as EDSNLP_Direction,
)
from edsnlp.pipelines.misc.measurements.measurements import (
SimpleMeasurement as EDSNLP_SimpleMeasurement,
)
from spacy import Language
from spacy.tokens import Span as SpacySpan
from spacy.tokens.underscore import Underscore
from medkit.core import Attribute
from medkit.text.ner import (
ADICAPNormAttribute,
DateAttribute,
RelativeDateAttribute,
RelativeDateDirection,
DurationAttribute,
)
from medkit.text.ner.tnm_attribute import TNMAttribute
from medkit.text.spacy import SpacyPipeline, SpacyDocPipeline
[docs]def build_date_attribute(spacy_span: SpacySpan, spacy_label: str) -> Attribute:
"""
Build a medkit date attribute from an EDS-NLP attribute with a date object
as value.
Parameters
----------
spacy_span
Spacy span having an ESD-NLP date attribute
spacy_label
Label of the date attribute on `spacy_spacy`. Ex: "date", "consultation_date"
Returns
-------
Attribute
:class:`~medkit.text.ner.DateAttribute` or
:class:`~medkit.text.ner.RelativeDateAttribute` instance, depending on
the EDS-NLP attribute
"""
value = spacy_span._.get(spacy_label)
if isinstance(value, EDSNLP_AbsoluteDate):
return DateAttribute(
label=spacy_label,
year=value.year,
month=value.month,
day=value.day,
hour=value.hour,
minute=value.minute,
second=value.second,
)
elif isinstance(value, EDSNLP_RelativeDate):
direction = (
RelativeDateDirection.PAST
if value.direction is EDSNLP_Direction.PAST
else RelativeDateDirection.FUTURE
)
return RelativeDateAttribute(
label=spacy_label,
direction=direction,
years=value.year,
months=value.month,
weeks=value.week,
days=value.day,
hours=value.hour,
minutes=value.minute,
seconds=value.second,
)
else:
raise ValueError(
f"Unexpected value type: {type(value)} for spaCy attribute with label"
f" '{spacy_label}'"
)
[docs]def build_duration_attribute(
spacy_span: SpacySpan, spacy_label: str
) -> DurationAttribute:
"""
Build a medkit duration attribute from an EDS-NLP attribute with a duration
object as value.
Parameters
----------
spacy_span
Spacy span having an ESD-NLP date attribute
spacy_label
Label of the date attribute on `spacy_spacy`. Ex: "duration"
Returns
-------
DurationAttribute
Medkit duration attribute
"""
value = spacy_span._.get(spacy_label)
assert isinstance(value, EDSNLP_Duration)
return DurationAttribute(
label=spacy_label,
years=value.year,
months=value.month,
weeks=value.week,
days=value.day,
hours=value.hour,
minutes=value.minute,
seconds=value.second,
)
[docs]def build_adicap_attribute(
spacy_span: SpacySpan, spacy_label: str
) -> ADICAPNormAttribute:
"""
Build a medkit ADICAP normalization attribute from an EDS-NLP attribute with
an ADICAP object as value.
Parameters
----------
spacy_span
Spacy span having an ADICAP object as value
spacy_label
Label of the attribute on `spacy_spacy`. Ex: "adicap"
Returns
-------
ADICAPNormAttribute
Medkit ADICAP normalization attribute
"""
value = spacy_span._.get(spacy_label)
assert isinstance(value, EDSNLP_AdicapCode)
return ADICAPNormAttribute(
code=value.code,
sampling_mode=value.sampling_mode,
technic=value.technic,
organ=value.organ,
pathology=value.pathology,
pathology_type=value.pathology_type,
behaviour_type=value.behaviour_type,
)
[docs]def build_tnm_attribute(spacy_span: SpacySpan, spacy_label: str) -> TNMAttribute:
"""
Build a medkit TNM attribute from an EDS-NLP attribute with a TNM object as
value.
Parameters
----------
spacy_span
Spacy span having a TNM object as value
spacy_label
Label of the attribute on `spacy_spacy`. Ex: "tnm"
Returns
-------
TNMAttribute
Medkit TNM attribute
"""
value = spacy_span._.get(spacy_label)
assert isinstance(value, EDSNLP_TNM)
return TNMAttribute(
prefix=value.prefix,
tumour=value.tumour,
tumour_specification=value.tumour_specification,
node=value.node,
node_specification=value.node_specification,
node_suffix=value.node_suffix,
metastasis=value.metastasis,
resection_completeness=value.resection_completeness,
version=value.version,
version_year=value.version_year,
)
[docs]def build_measurement_attribute(spacy_span: SpacySpan, spacy_label: str) -> Attribute:
"""
Build a medkit attribute from an EDS-NLP attribute with a measurement object
as value.
Parameters
----------
spacy_span
Spacy span having a measurement object as value
spacy_label
Label of the attribute on `spacy_spacy`. Ex: "size", "weight", "bmi"
Returns
-------
Attribute
Medkit attribute with normalized measurement value and "unit" metadata
"""
value = spacy_span._.get(spacy_label)
assert isinstance(value, EDSNLP_SimpleMeasurement)
return Attribute(
label=spacy_label, value=value.value, metadata={"unit": value.unit}
)
DEFAULT_ATTRIBUTE_FACTORIES = {
# from eds.adicap
"adicap": build_adicap_attribute,
# from eds.tnm
"tnm": build_tnm_attribute,
# from eds.dates
"date": build_date_attribute,
"duration": build_duration_attribute,
# from eds.consultation_dates
"consultation_date": build_date_attribute,
# from eds.measurements
"weight": build_measurement_attribute,
"size": build_measurement_attribute,
"bmi": build_measurement_attribute,
"volume": build_measurement_attribute,
}
"""Pre-defined attribute factories to handle EDS-NLP attributes"""
_ATTR_LABELS_TO_IGNORE = {
# seems to always have an identical attr with a more specific label
# since EDSNLP 0.9
"value",
# text after spaCy pre-preprocessing
"normalized_variant",
# should be in metadata of entities matched by eds.contextual-matcher but we don't support that
"assigned",
"source",
# declared but unused attribute of eds.dates
"datetime",
# unsupported experimental feature of eds.dates
"period"
# ignored because each entity matched by eds.reason will also have its own is_reason attribute
"ents_reason",
# redundant with score attr with more specific label
"score_value",
# could be in metadata of score attrs but not worth the trouble
"score_method",
# context/qualifying attributes with deprecated aliases
# cues could be included in metadata but not worth the trouble
"family_",
"family_cues",
"history_",
"history_cues",
"recent_cues",
"antecedents",
"antecedents_",
"antecedents_cues",
"antecedent",
"antecedent_",
"antecedent_cues",
"hypothesis_",
"hypothesis_cues",
"negation_",
"negated",
"polarity_",
"negation_cues",
"reported_speech_",
"reported_speech_cues",
}
[docs]class EDSNLPPipeline(SpacyPipeline):
"""Segment annotator relying on an EDS-NLP pipeline"""
def __init__(
self,
nlp: Language,
spacy_entities: Optional[List[str]] = None,
spacy_span_groups: Optional[List[str]] = None,
spacy_attrs: Optional[List[str]] = None,
medkit_attribute_factories: Optional[
Dict[str, Callable[[SpacySpan, str], Attribute]]
] = None,
name: Optional[str] = None,
uid: Optional[str] = None,
):
"""Initialize the segment annotator
Parameters
----------
nlp:
Language object with the loaded pipeline from Spacy
spacy_entities:
Labels of new spacy entities (`doc.ents`) to convert into medkit entities.
If `None` (default) all the new spacy entities will be converted
spacy_span_groups:
Name of new spacy span groups (`doc.spans`) to convert into medkit segments.
If `None` (default) new spacy span groups will be converted
spacy_attrs:
Name of span extensions to convert into medkit attributes. If
`None`, all non-redundant EDS-NLP attributes will be handled.
medkit_attribute_factories:
Mapping of factories in charge of converting spacy attributes to
medkit attributes. Factories will receive a spacy span and an an
attribute label when called. The key in the mapping is the attribute
label.
Pre-defined default factories are listed in
:const:`~DEFAULT_ATTRIBUTE_FACTORIES`
name:
Name describing the pipeline (defaults to the class name).
uid:
Identifier of the pipeline
"""
if medkit_attribute_factories is None:
medkit_attribute_factories = DEFAULT_ATTRIBUTE_FACTORIES
else:
medkit_attribute_factories = {
**DEFAULT_ATTRIBUTE_FACTORIES,
**medkit_attribute_factories,
}
if spacy_attrs is None:
# default to all span attributes except blacklisted ones
spacy_attrs = [
attr
for attr in Underscore.span_extensions
if attr not in _ATTR_LABELS_TO_IGNORE
]
super().__init__(
nlp=nlp,
spacy_entities=spacy_entities,
spacy_span_groups=spacy_span_groups,
spacy_attrs=spacy_attrs,
medkit_attribute_factories=medkit_attribute_factories,
name=name,
uid=uid,
)
[docs]class EDSNLPDocPipeline(SpacyDocPipeline):
"""
DocPipeline to obtain annotations created using EDS-NLP
"""
def __init__(
self,
nlp: Language,
medkit_labels_anns: Optional[List[str]] = None,
medkit_attrs: Optional[List[str]] = None,
spacy_entities: Optional[List[str]] = None,
spacy_span_groups: Optional[List[str]] = None,
spacy_attrs: Optional[List[str]] = None,
medkit_attribute_factories: Optional[
Dict[str, Callable[[SpacySpan, str], Attribute]]
] = None,
name: Optional[str] = None,
uid: Optional[str] = None,
):
"""Initialize the pipeline
Parameters
----------
nlp:
Language object with the loaded pipeline from Spacy
medkit_labels_anns:
Labels of medkit annotations to include in the spacy document.
If `None` (default) all the annotations will be included.
medkit_attrs:
Labels of medkit attributes to add in the annotations that will be included.
If `None` (default) all the attributes will be added as `custom attributes`
in each annotation included.
spacy_entities:
Labels of new spacy entities (`doc.ents`) to convert into medkit entities.
If `None` (default) all the new spacy entities will be converted and added into
its origin medkit document.
spacy_span_groups:
Name of new spacy span groups (`doc.spans`) to convert into medkit segments.
If `None` (default) new spacy span groups will be converted and added into
its origin medkit document.
spacy_attrs:
Name of span extensions to convert into medkit attributes. If
`None`, all non-redundant EDS-NLP attributes will be handled.
medkit_attribute_factories:
Mapping of factories in charge of converting spacy attributes to
medkit attributes. Factories will receive a spacy span and an an
attribute label when called. The key in the mapping is the attribute
label.
Pre-defined default factories are listed in
:const:`~DEFAULT_ATTRIBUTE_FACTORIES`
name:
Name describing the pipeline (defaults to the class name).
uid:
Identifier of the pipeline
"""
# use pre-defined attribute factory
if medkit_attribute_factories is None:
medkit_attribute_factories = DEFAULT_ATTRIBUTE_FACTORIES
else:
medkit_attribute_factories = {
**DEFAULT_ATTRIBUTE_FACTORIES,
**medkit_attribute_factories,
}
if spacy_attrs is None:
# default to all span attributes except blacklisted ones
spacy_attrs = [
attr
for attr in Underscore.span_extensions
if attr not in _ATTR_LABELS_TO_IGNORE
]
super().__init__(
nlp=nlp,
medkit_labels_anns=medkit_labels_anns,
medkit_attrs=medkit_attrs,
spacy_entities=spacy_entities,
spacy_span_groups=spacy_span_groups,
spacy_attrs=spacy_attrs,
medkit_attribute_factories=medkit_attribute_factories,
name=name,
uid=uid,
)