File size: 1,646 Bytes
8720f73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import csv
from pathlib import Path
import datasets
_CITATION = ""
_DESCRIPTION = "Local video dataset with nested 'video' field (id, path, bytes)."
class LocalVideoConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class LocalVideoDataset(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [LocalVideoConfig(name="default", description=_DESCRIPTION)]
def _info(self):
features = datasets.Features({
"video": {
"id": datasets.Value("string"),
"path": datasets.Value("string"),
"bytes": datasets.Value("binary"),
}
})
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
citation=_CITATION,
homepage="",
)
def _split_generators(self, dl_manager):
base = Path(__file__).parent.resolve()
manifest = base / "test_manifest.csv"
if not manifest.exists():
raise FileNotFoundError(f"Missing manifest CSV: {manifest}")
return [datasets.SplitGenerator(name=datasets.Split.TEST,
gen_kwargs={"manifest_path": str(manifest)})]
def _generate_examples(self, manifest_path):
with open(manifest_path, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
vid_id = row["id"]
path = row["path"]
data = Path(path).read_bytes()
yield vid_id, {"video": {"id": vid_id, "path": path, "bytes": data}}
|