Source code for medkit.io.medkit_json.audio

__all__ = [
    "load_audio_document",
    "load_audio_documents",
    "load_audio_anns",
    "save_audio_document",
    "save_audio_documents",
    "save_audio_anns",
]

import json
from pathlib import Path
from typing import Iterable, Iterator, Optional, Union
import warnings

from medkit.core.audio import AudioDocument, Segment
from medkit.io.medkit_json._common import ContentType, build_header, check_header


_DOC_ANNS_SUFFIX = "_anns.jsonl"


[docs]def load_audio_document( input_file: Union[str, Path], anns_input_file: Optional[Union[str, Path]] = None, encoding: Optional[str] = "utf-8", ) -> AudioDocument: """ Load an audio document from a medkit-json file generated with :func:`~medkit.io.medkit_json.save_audio_document` Parameters ---------- input_file: Path to the medkit-json file containing the document anns_input_file: Optional medkit-json file containing separate annotations of the document. encoding: Optional encoding of `input_file` and `anns_input_file` Returns ------- AudioDocument The audio document in the file """ input_file = Path(input_file) with open(input_file, encoding=encoding) as fp: data = json.load(fp) check_header(data, ContentType.AUDIO_DOCUMENT) doc = AudioDocument.from_dict(data["content"]) if anns_input_file is not None: for ann in load_audio_anns(anns_input_file, encoding=encoding): doc.anns.add(ann) return doc
[docs]def load_audio_documents( input_file: Union[str, Path], encoding: Optional[str] = "utf-8" ) -> Iterator[AudioDocument]: """ Load audio documents from a medkit-json file generated with :func:`~medkit.io.medkit_json.save_audio_documents` Parameters ---------- input_file: Path to the medkit-json file containing the documents encoding: Optional encoding of `input_file` Returns ------- Iterator[AudioDocument] An iterator to the audio documents in the file """ input_file = Path(input_file) with open(input_file, encoding=encoding) as fp: line = fp.readline() data = json.loads(line) check_header(data, ContentType.AUDIO_DOCUMENT_LIST) for line in fp: doc_data = json.loads(line) doc = AudioDocument.from_dict(doc_data) yield doc
[docs]def load_audio_anns( input_file: Union[str, Path], encoding: Optional[str] = "utf-8" ) -> Iterator[Segment]: """ Load audio annotations from a medkit-json file generated with :func:`~medkit.io.medkit_json.save_audio_anns` Parameters ---------- input_file: Path to the medkit-json file containing the annotations encoding: Optional encoding of `input_file` Returns ------- Iterator[Segment] An iterator to the audio annotations in the file """ input_file = Path(input_file) with open(input_file, encoding=encoding) as fp: line = fp.readline() data = json.loads(line) check_header(data, ContentType.AUDIO_ANNOTATION_LIST) for line in fp: ann_data = json.loads(line) ann = Segment.from_dict(ann_data) yield ann
[docs]def save_audio_document( doc: AudioDocument, output_file: Union[str, Path], split_anns: bool = False, anns_output_file: Optional[Union[str, Path]] = None, encoding: Optional[str] = "utf-8", ): """ Save an audio document into a medkit-json file. Parameters ---------- doc: The audio document to save output_file: Path of the generated medkit-json file split_anns: If True, the annotations will be saved in a separate medkit-json file instead of being included in the main document file anns_output_file: Path of the medkit-json file storing the annotations if `split_anns` is True. If not provided, `output_file` will be used with an extra "_anns" suffix. encoding: Optional encoding of `output_file` and `anns_output_file` """ output_file = Path(output_file) anns_output_file = Path(anns_output_file) if anns_output_file is not None else None if not split_anns and anns_output_file is not None: warnings.warn( "anns_output_file provided but split_anns is False so it will not be used" ) data = build_header(content_type=ContentType.AUDIO_DOCUMENT) data["content"] = doc.to_dict(with_anns=not split_anns) with open(output_file, mode="w", encoding=encoding) as fp: json.dump(data, fp, indent=4) if split_anns: if anns_output_file is None: anns_output_file = output_file.with_suffix(_DOC_ANNS_SUFFIX) save_audio_anns(doc.anns, anns_output_file, encoding=encoding)
[docs]def save_audio_documents( docs: Iterable[AudioDocument], output_file: Union[str, Path], encoding: Optional[str] = "utf-8", ): """ Save audio documents into a medkit-json file. Parameters ---------- docs: The audio documents to save output_file: Path of the generated medkit-json file encoding: Optional encoding of `output_file` """ output_file = Path(output_file) header = build_header(content_type=ContentType.AUDIO_DOCUMENT_LIST) with open(output_file, mode="w", encoding=encoding) as fp: fp.write(json.dumps(header) + "\n") for doc in docs: doc_data = doc.to_dict() fp.write(json.dumps(doc_data) + "\n")
[docs]def save_audio_anns( anns: Iterable[Segment], output_file: Union[str, Path], encoding: Optional[str] = "utf-8", ): """ Save audio annotations into a medkit-json file. Parameters ---------- docs: The audio annotations to save output_file: Path of the generated medkit-json file encoding: Optional encoding of `output_file` """ output_file = Path(output_file) header = build_header(content_type=ContentType.AUDIO_ANNOTATION_LIST) with open(output_file, mode="w", encoding=encoding) as fp: fp.write(json.dumps(header) + "\n") for ann in anns: ann_data = ann.to_dict() fp.write(json.dumps(ann_data) + "\n")