Source code for medkit.text.spacy.displacy_utils
from __future__ import annotations
__all__ = ["medkit_doc_to_displacy", "entities_to_displacy"]
from typing import Any, Callable
from medkit.core.text import Entity, TextDocument, span_utils
[docs]
def medkit_doc_to_displacy(
medkit_doc: TextDocument,
entity_labels: list[str] | None = None,
entity_formatter: Callable[[Entity], str] | None = None,
max_gap_length: int = 3,
) -> dict[str, Any]:
"""Build data dict that can be passed to `displacy.render()`
(with `manual=True` and `style="ent"`) to visualize entities of
a document.
Parameters
----------
medkit_doc : TextDocument
Document to visualize.
entity_labels : list of str, optional
Labels of entities to display. If `None`, all entities are displayed.
entity_formatter : Callable, optional
Optional function returning the text to display as label for a given
entity. If `None`, the entity label will be used. Can be used for
instance to display normalization information available in entity
attributes.
max_gap_length : int, default=3
When cleaning up gaps in spans, spans around gaps smaller than `max_gap_length`
will be merged.
Cf :func:`~medkit.core.text.span_utils.clean_up_gaps_in_normalized_spans()`.
Returns
-------
dict of str to Any
Data to be passed to `displacy.render()` as `docs` argument
(with `manual=True` and `style="ent"`)
"""
entities = (
[e for label in entity_labels for e in medkit_doc.anns.get_entities(label=label)]
if entity_labels
else medkit_doc.anns.get_entities()
)
return entities_to_displacy(entities, medkit_doc.text, entity_formatter, max_gap_length)
[docs]
def entities_to_displacy(
entities: list[Entity],
raw_text: str,
entity_formatter: Callable[[Entity], str] | None = None,
max_gap_length: int = 3,
) -> dict[str, Any]:
"""Build data dict that can be passed to `displacy.render()`
(with `manual=True` and `style="ent"`) to visualize entities.
Parameters
----------
entities : list of Entity
Entities to visualize in text context.
raw_text : str
Initial document text from which entities where extracted and to which they spans refer
(typically the `text` attribute of a :class:`~medkit.core.text.document.TextDocument`).
entity_formatter : Callable, optional
Optional function returning the text to display as label for a given
entity. If `None`, the entity label will be used. Can be used for
instance to display normalization information available in entity
attributes.
max_gap_length : int, default=3
When cleaning up gaps in spans, spans around gaps smaller than `max_gap_length`
will be merged.
Cf :func:`~medkit.core.text.span_utils.clean_up_gaps_in_normalized_spans()`.
Returns
-------
dict of str to Any
Data to be passed to `displacy.render()` as `docs` argument
(with `manual=True` and `style="ent"`)
"""
ents_data = []
for entity in entities:
normalized_spans = span_utils.normalize_spans(entity.spans)
# normalized spans can be empty if spans contained ModifiedSpan with no replaced_spans
if not normalized_spans:
continue
# merge close spans
cleaned_spans = span_utils.clean_up_gaps_in_normalized_spans(
normalized_spans, raw_text, max_gap_length=max_gap_length
)
# generate text label
label = entity_formatter(entity) if entity_formatter else entity.label
ents_data += [{"start": span.start, "end": span.end, "label": label} for span in cleaned_spans]
ents_data = sorted(ents_data, key=lambda d: d["start"])
return {"text": raw_text, "ents": ents_data}