from __future__ import annotations
__all__ = [
"load_audio_document",
"load_audio_documents",
"load_audio_anns",
"save_audio_document",
"save_audio_documents",
"save_audio_anns",
]
import json
import warnings
from pathlib import Path
from typing import Iterable, Iterator
from medkit.core.audio import AudioDocument, Segment
from medkit.io.medkit_json._common import ContentType, build_header, check_header
_DOC_ANNS_SUFFIX = "_anns.jsonl"
[docs]
def load_audio_document(
input_file: str | Path,
anns_input_file: str | Path | None = None,
encoding: str | None = "utf-8",
) -> AudioDocument:
"""Load an audio document from a medkit-json file generated with
:func:`~medkit.io.medkit_json.save_audio_document`
Parameters
----------
input_file : str or Path
Path to the medkit-json file containing the document
anns_input_file : str or Path, optional
Optional medkit-json file containing separate annotations of the
document.
encoding : str, default="utf-8"
Optional encoding of `input_file` and `anns_input_file`
Returns
-------
AudioDocument
The audio document in the file
"""
with Path(input_file).open(encoding=encoding) as fp:
data = json.load(fp)
check_header(data, ContentType.AUDIO_DOCUMENT)
doc = AudioDocument.from_dict(data["content"])
if anns_input_file is not None:
for ann in load_audio_anns(anns_input_file, encoding=encoding):
doc.anns.add(ann)
return doc
[docs]
def load_audio_documents(input_file: str | Path, encoding: str | None = "utf-8") -> Iterator[AudioDocument]:
"""Load audio documents from a medkit-json file generated with
:func:`~medkit.io.medkit_json.save_audio_documents`
Parameters
----------
input_file : str or Path
Path to the medkit-json file containing the documents
encoding : str, default="utf-8"
Optional encoding of `input_file`
Returns
-------
iterator of AudioDocument
An iterator to the audio documents in the file
"""
with Path(input_file).open(encoding=encoding) as fp:
line = fp.readline()
data = json.loads(line)
check_header(data, ContentType.AUDIO_DOCUMENT_LIST)
for line in fp:
doc_data = json.loads(line)
doc = AudioDocument.from_dict(doc_data)
yield doc
[docs]
def load_audio_anns(input_file: str | Path, encoding: str | None = "utf-8") -> Iterator[Segment]:
"""Load audio annotations from a medkit-json file generated with
:func:`~medkit.io.medkit_json.save_audio_anns`
Parameters
----------
input_file : str or Path
Path to the medkit-json file containing the annotations
encoding : str, default="utf-8"
Optional encoding of `input_file`
Returns
-------
iterator of Segment
An iterator to the audio annotations in the file
"""
with Path(input_file).open(encoding=encoding) as fp:
line = fp.readline()
data = json.loads(line)
check_header(data, ContentType.AUDIO_ANNOTATION_LIST)
for line in fp:
ann_data = json.loads(line)
ann = Segment.from_dict(ann_data)
yield ann
[docs]
def save_audio_document(
doc: AudioDocument,
output_file: str | Path,
split_anns: bool = False,
anns_output_file: str | Path | None = None,
encoding: str | None = "utf-8",
):
"""Save an audio document into a medkit-json file.
Parameters
----------
doc : AudioDocument
The audio document to save
output_file : str or Path
Path of the generated medkit-json file
split_anns : bool, default=False
If True, the annotations will be saved in a separate medkit-json file
instead of being included in the main document file
anns_output_file : str or Path, optional
Path of the medkit-json file storing the annotations if `split_anns` is True.
If not provided, `output_file` will be used with an extra "_anns" suffix.
encoding : str, default="utf-8"
Optional encoding of `output_file` and `anns_output_file`
"""
output_file = Path(output_file)
anns_output_file = Path(anns_output_file) if anns_output_file else None
if not split_anns and anns_output_file is not None:
warnings.warn(
"anns_output_file provided but split_anns is False so it will not be used",
stacklevel=2,
)
data = build_header(content_type=ContentType.AUDIO_DOCUMENT)
data["content"] = doc.to_dict(with_anns=not split_anns)
with output_file.open(mode="w", encoding=encoding) as fp:
json.dump(data, fp, ensure_ascii=False, indent=4)
if split_anns:
if anns_output_file is None:
anns_output_file = output_file.with_suffix(_DOC_ANNS_SUFFIX)
save_audio_anns(doc.anns, anns_output_file, encoding=encoding)
[docs]
def save_audio_documents(
docs: Iterable[AudioDocument],
output_file: str | Path,
encoding: str | None = "utf-8",
):
"""Save audio documents into a medkit-json file.
Parameters
----------
docs : iterable of AudioDocument
The audio documents to save
output_file : str or Path
Path of the generated medkit-json file
encoding : str, default="utf-8"
Optional encoding of `output_file`
"""
header = build_header(content_type=ContentType.AUDIO_DOCUMENT_LIST)
with Path(output_file).open(mode="w", encoding=encoding) as fp:
fp.write(json.dumps(header, ensure_ascii=False) + "\n")
for doc in docs:
doc_data = doc.to_dict()
fp.write(json.dumps(doc_data, ensure_ascii=False) + "\n")
[docs]
def save_audio_anns(
anns: Iterable[Segment],
output_file: str | Path,
encoding: str | None = "utf-8",
):
"""Save audio annotations into a medkit-json file.
Parameters
----------
docs : iterable of Segment
The audio annotations to save
output_file : str or Path
Path of the generated medkit-json file
encoding : str, default="utf-8"
Optional encoding of `output_file`
"""
header = build_header(content_type=ContentType.AUDIO_ANNOTATION_LIST)
with Path(output_file).open(mode="w", encoding=encoding) as fp:
fp.write(json.dumps(header, ensure_ascii=False) + "\n")
for ann in anns:
ann_data = ann.to_dict()
fp.write(json.dumps(ann_data, ensure_ascii=False) + "\n")