Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Iterable, Iterator, List, Union | |
| from .schemas import ManifestItem | |
| def read_jsonl(path: Union[str, Path]) -> Iterator[dict]: | |
| path = Path(path) | |
| with path.open("r", encoding="utf-8") as f: | |
| for line in f: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| yield json.loads(line) | |
| def write_jsonl(path: Union[str, Path], records: Iterable[dict]) -> None: | |
| path = Path(path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with path.open("w", encoding="utf-8") as f: | |
| for r in records: | |
| f.write(json.dumps(r, ensure_ascii=False) + "\n") | |
| def append_jsonl(path: Union[str, Path], record: dict) -> None: | |
| path = Path(path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with path.open("a", encoding="utf-8") as f: | |
| f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| def load_manifest(path: Union[str, Path]) -> List[ManifestItem]: | |
| items = [] | |
| for obj in read_jsonl(path): | |
| items.append(ManifestItem(**obj)) | |
| return items | |