Spaces:
Sleeping
Sleeping
File size: 2,369 Bytes
fca9b58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | #!/usr/bin/env python
import json
from pathlib import Path
from huggingface_hub import HfApi
# ===== ์ค์ =====
REPO_ID = "dghadiya/video_eval_extend"
BRANCH = "main"
# ์ฌ๊ธฐ์ "13๊ฐ ์ก์
ํด๋์ค ์ด๋ฆ"์ ์ ํํ ์จ ์ค.
# ์์๋ ์์์ผ. ๋๊ฐ ์ค์ ์ฌ์ฉํ๋ 13๊ฐ๋ก ๊ต์ฒด!!
ACTIONS_13 = [
"Bowling",
"CleanAndJerk",
"GolfSwing",
"HammerThrow",
"Hammering",
"HandStandPushups",
"JugglingBalls",
"JumpRope",
"Lunges",
"PlayingGuitar",
"RockClimbingIndoor",
"RopeClimbing",
"Surfing",
]
OUTPUT_JSON = "videos_extend.json"
api = HfApi()
def main():
print(f"Listing files in HF dataset: {REPO_ID} @ {BRANCH}")
files = api.list_repo_files(
repo_id=REPO_ID,
repo_type="dataset",
revision=BRANCH,
)
print(f"Total files in repo: {len(files)}")
entries = []
seen = set()
for path in files:
# mp4 ํ์ผ๋ง ์ฌ์ฉ
if not path.lower().endswith(".mp4"):
continue
# ๊ธฐ๋ํ๋ ๊ตฌ์กฐ ์์:
# Wan2.2/Bowling/v_Bowling_g01_c06.mp4
# RunwayGen4/BodyWeightSquats/xxx.mp4
parts = path.split("/")
if len(parts) < 3:
# model/action/file ๊ตฌ์กฐ๊ฐ ์๋๋ฉด ์คํต
continue
model_name = parts[0]
action_name = parts[1]
# 13๊ฐ ์ก์
๋ง ์ฌ์ฉ
if action_name not in ACTIONS_13:
continue
# ์ค๋ณต ๋ฐฉ์ง (๊ฐ์ path๊ฐ ์ฌ๋ฌ ๋ฒ ๋์ค์ง ์๋๋ก)
if path in seen:
continue
seen.add(path)
url = (
f"https://huggingface.co/datasets/{REPO_ID}"
f"/resolve/{BRANCH}/{path}"
)
entries.append(
{
"url": url,
"id": path,
"action": action_name,
}
)
# ์ก์
์ด๋ฆ, ๊ทธ ๋ค์ id ๊ธฐ์ค์ผ๋ก ์ ๋ ฌ(๋ณด๊ธฐ ํธํ๊ฒ)
entries.sort(key=lambda e: (e["action"], e["id"]))
print(f"Collected {len(entries)} videos across {len(ACTIONS_13)} actions.")
out_path = Path(OUTPUT_JSON)
out_path.write_text(json.dumps(entries, indent=2), encoding="utf-8")
print(f"Saved to {out_path.resolve()}")
if __name__ == "__main__":
# HF_TOKEN ํ๊ฒฝ๋ณ์๋ ๋ฏธ๋ฆฌ ์ค์ ๋์ด ์์ด์ผ ํจ
# export HF_TOKEN=...
main()
|