eft_trained / tmp /data /dataset.py
Qafig's picture
Upload folder using huggingface_hub
8720f73 verified
import csv
from pathlib import Path
import datasets
_CITATION = ""
_DESCRIPTION = "Local video dataset with nested 'video' field (id, path, bytes)."
class LocalVideoConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class LocalVideoDataset(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [LocalVideoConfig(name="default", description=_DESCRIPTION)]
def _info(self):
features = datasets.Features({
"video": {
"id": datasets.Value("string"),
"path": datasets.Value("string"),
"bytes": datasets.Value("binary"),
}
})
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
citation=_CITATION,
homepage="",
)
def _split_generators(self, dl_manager):
base = Path(__file__).parent.resolve()
manifest = base / "test_manifest.csv"
if not manifest.exists():
raise FileNotFoundError(f"Missing manifest CSV: {manifest}")
return [datasets.SplitGenerator(name=datasets.Split.TEST,
gen_kwargs={"manifest_path": str(manifest)})]
def _generate_examples(self, manifest_path):
with open(manifest_path, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
vid_id = row["id"]
path = row["path"]
data = Path(path).read_bytes()
yield vid_id, {"video": {"id": vid_id, "path": path, "bytes": data}}