Source code for medkit.text.spacy.displacy_utils

__all__ = ["medkit_doc_to_displacy", "entities_to_displacy"]

from typing import Any, Callable, Dict, List, Optional

from medkit.core.text import TextDocument, Segment, span_utils


[docs]def medkit_doc_to_displacy( medkit_doc: TextDocument, entity_labels: Optional[List[str]] = None, entity_formatter: Optional[Callable[[Segment], str]] = None, max_gap_length: int = 3, ) -> Dict[str, Any]: """Build data dict that can be passed to `displacy.render()` (with `manual=True` and `style="ent"`) to visualize entities of a document. Parameters ---------- medkit_doc: Document to visualize. entity_labels: Labels of entities to display. If `None`, all entities are displayed. entity_formatter: Optional function returning the text to display as label for a given entity. If `None`, the entity label will be used. Can be used for instance to display normalization information available in entity attributes. max_gap_length: When cleaning up gaps in spans, spans around gaps smaller than `max_gap_length` will be merged. Cf :func:`~medkit.core.text.span_utils.clean_up_gaps_in_normalized_spans()`. Returns ------- Dict[str, Any] Data to be passed to `displacy.render()` as `docs` argument (with `manual=True` and `style="ent"`) """ if entity_labels: entities = [ e for label in entity_labels for e in medkit_doc.anns.get_entities(label=label) ] else: entities = medkit_doc.anns.get_entities() return entities_to_displacy( entities, medkit_doc.text, entity_formatter, max_gap_length )
[docs]def entities_to_displacy( entities: List[Segment], raw_text: str, entity_formatter: Optional[Callable[[Segment], str]] = None, max_gap_length: int = 3, ) -> Dict[str, Any]: """Build data dict that can be passed to `displacy.render()` (with `manual=True` and `style="ent"`) to visualize entities. Parameters ---------- entities: Entities to visualize in text context. raw_text: Initial document text from which entities where extracted and to which they spans refer (typically the `text` attribute of a :class:`~medkit.core.text.document.TextDocument`). entity_formatter: Optional function returning the text to display as label for a given entity. If `None`, the entity label will be used. Can be used for instance to display normalization information available in entity attributes. max_gap_length: When cleaning up gaps in spans, spans around gaps smaller than `max_gap_length` will be merged. Cf :func:`~medkit.core.text.span_utils.clean_up_gaps_in_normalized_spans()`. Returns ------- Dict[str, Any] Data to be passed to `displacy.render()` as `docs` argument (with `manual=True` and `style="ent"`) """ ents_data = [] for entity in entities: normalized_spans = span_utils.normalize_spans(entity.spans) # normalized spans can be empty if spans contained ModifiedSpan with no replaced_spans if not normalized_spans: continue # merge close spans cleaned_spans = span_utils.clean_up_gaps_in_normalized_spans( normalized_spans, raw_text, max_gap_length=max_gap_length ) # generate text label if entity_formatter: label = entity_formatter(entity) else: label = entity.label ents_data += [ {"start": span.start, "end": span.end, "label": label} for span in cleaned_spans ] ents_data = sorted(ents_data, key=lambda d: d["start"]) return {"text": raw_text, "ents": ents_data}