| import csv | |
| from pathlib import Path | |
| import datasets | |
| _CITATION = "" | |
| _DESCRIPTION = "Local video dataset with nested 'video' field (id, path, bytes)." | |
| class LocalVideoConfig(datasets.BuilderConfig): | |
| def __init__(self, **kwargs): | |
| super().__init__(version=datasets.Version("1.0.0"), **kwargs) | |
| class LocalVideoDataset(datasets.GeneratorBasedBuilder): | |
| BUILDER_CONFIGS = [LocalVideoConfig(name="default", description=_DESCRIPTION)] | |
| def _info(self): | |
| features = datasets.Features({ | |
| "video": { | |
| "id": datasets.Value("string"), | |
| "path": datasets.Value("string"), | |
| "bytes": datasets.Value("binary"), | |
| } | |
| }) | |
| return datasets.DatasetInfo( | |
| description=_DESCRIPTION, | |
| features=features, | |
| citation=_CITATION, | |
| homepage="", | |
| ) | |
| def _split_generators(self, dl_manager): | |
| base = Path(__file__).parent.resolve() | |
| manifest = base / "test_manifest.csv" | |
| if not manifest.exists(): | |
| raise FileNotFoundError(f"Missing manifest CSV: {manifest}") | |
| return [datasets.SplitGenerator(name=datasets.Split.TEST, | |
| gen_kwargs={"manifest_path": str(manifest)})] | |
| def _generate_examples(self, manifest_path): | |
| with open(manifest_path, newline="") as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| vid_id = row["id"] | |
| path = row["path"] | |
| data = Path(path).read_bytes() | |
| yield vid_id, {"video": {"id": vid_id, "path": path, "bytes": data}} | |