video_eval / make_video_extend_json.py
Youngsun Lim
videoscore test
fca9b58
#!/usr/bin/env python
import json
from pathlib import Path
from huggingface_hub import HfApi
# ===== ์„ค์ • =====
REPO_ID = "dghadiya/video_eval_extend"
BRANCH = "main"
# ์—ฌ๊ธฐ์— "13๊ฐœ ์•ก์…˜ ํด๋ž˜์Šค ์ด๋ฆ„"์„ ์ •ํ™•ํžˆ ์จ ์ค˜.
# ์˜ˆ์‹œ๋Š” ์ž„์‹œ์•ผ. ๋„ˆ๊ฐ€ ์‹ค์ œ ์‚ฌ์šฉํ•˜๋Š” 13๊ฐœ๋กœ ๊ต์ฒด!!
ACTIONS_13 = [
"Bowling",
"CleanAndJerk",
"GolfSwing",
"HammerThrow",
"Hammering",
"HandStandPushups",
"JugglingBalls",
"JumpRope",
"Lunges",
"PlayingGuitar",
"RockClimbingIndoor",
"RopeClimbing",
"Surfing",
]
OUTPUT_JSON = "videos_extend.json"
api = HfApi()
def main():
print(f"Listing files in HF dataset: {REPO_ID} @ {BRANCH}")
files = api.list_repo_files(
repo_id=REPO_ID,
repo_type="dataset",
revision=BRANCH,
)
print(f"Total files in repo: {len(files)}")
entries = []
seen = set()
for path in files:
# mp4 ํŒŒ์ผ๋งŒ ์‚ฌ์šฉ
if not path.lower().endswith(".mp4"):
continue
# ๊ธฐ๋Œ€ํ•˜๋Š” ๊ตฌ์กฐ ์˜ˆ์‹œ:
# Wan2.2/Bowling/v_Bowling_g01_c06.mp4
# RunwayGen4/BodyWeightSquats/xxx.mp4
parts = path.split("/")
if len(parts) < 3:
# model/action/file ๊ตฌ์กฐ๊ฐ€ ์•„๋‹ˆ๋ฉด ์Šคํ‚ต
continue
model_name = parts[0]
action_name = parts[1]
# 13๊ฐœ ์•ก์…˜๋งŒ ์‚ฌ์šฉ
if action_name not in ACTIONS_13:
continue
# ์ค‘๋ณต ๋ฐฉ์ง€ (๊ฐ™์€ path๊ฐ€ ์—ฌ๋Ÿฌ ๋ฒˆ ๋‚˜์˜ค์ง€ ์•Š๋„๋ก)
if path in seen:
continue
seen.add(path)
url = (
f"https://huggingface.co/datasets/{REPO_ID}"
f"/resolve/{BRANCH}/{path}"
)
entries.append(
{
"url": url,
"id": path,
"action": action_name,
}
)
# ์•ก์…˜ ์ด๋ฆ„, ๊ทธ ๋‹ค์Œ id ๊ธฐ์ค€์œผ๋กœ ์ •๋ ฌ(๋ณด๊ธฐ ํŽธํ•˜๊ฒŒ)
entries.sort(key=lambda e: (e["action"], e["id"]))
print(f"Collected {len(entries)} videos across {len(ACTIONS_13)} actions.")
out_path = Path(OUTPUT_JSON)
out_path.write_text(json.dumps(entries, indent=2), encoding="utf-8")
print(f"Saved to {out_path.resolve()}")
if __name__ == "__main__":
# HF_TOKEN ํ™˜๊ฒฝ๋ณ€์ˆ˜๋Š” ๋ฏธ๋ฆฌ ์„ค์ •๋˜์–ด ์žˆ์–ด์•ผ ํ•จ
# export HF_TOKEN=...
main()