Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| import json | |
| from pathlib import Path | |
| from huggingface_hub import HfApi | |
| # ===== ์ค์ ===== | |
| REPO_ID = "dghadiya/video_eval_extend" | |
| BRANCH = "main" | |
| # ์ฌ๊ธฐ์ "13๊ฐ ์ก์ ํด๋์ค ์ด๋ฆ"์ ์ ํํ ์จ ์ค. | |
| # ์์๋ ์์์ผ. ๋๊ฐ ์ค์ ์ฌ์ฉํ๋ 13๊ฐ๋ก ๊ต์ฒด!! | |
| ACTIONS_13 = [ | |
| "Bowling", | |
| "CleanAndJerk", | |
| "GolfSwing", | |
| "HammerThrow", | |
| "Hammering", | |
| "HandStandPushups", | |
| "JugglingBalls", | |
| "JumpRope", | |
| "Lunges", | |
| "PlayingGuitar", | |
| "RockClimbingIndoor", | |
| "RopeClimbing", | |
| "Surfing", | |
| ] | |
| OUTPUT_JSON = "videos_extend.json" | |
| api = HfApi() | |
| def main(): | |
| print(f"Listing files in HF dataset: {REPO_ID} @ {BRANCH}") | |
| files = api.list_repo_files( | |
| repo_id=REPO_ID, | |
| repo_type="dataset", | |
| revision=BRANCH, | |
| ) | |
| print(f"Total files in repo: {len(files)}") | |
| entries = [] | |
| seen = set() | |
| for path in files: | |
| # mp4 ํ์ผ๋ง ์ฌ์ฉ | |
| if not path.lower().endswith(".mp4"): | |
| continue | |
| # ๊ธฐ๋ํ๋ ๊ตฌ์กฐ ์์: | |
| # Wan2.2/Bowling/v_Bowling_g01_c06.mp4 | |
| # RunwayGen4/BodyWeightSquats/xxx.mp4 | |
| parts = path.split("/") | |
| if len(parts) < 3: | |
| # model/action/file ๊ตฌ์กฐ๊ฐ ์๋๋ฉด ์คํต | |
| continue | |
| model_name = parts[0] | |
| action_name = parts[1] | |
| # 13๊ฐ ์ก์ ๋ง ์ฌ์ฉ | |
| if action_name not in ACTIONS_13: | |
| continue | |
| # ์ค๋ณต ๋ฐฉ์ง (๊ฐ์ path๊ฐ ์ฌ๋ฌ ๋ฒ ๋์ค์ง ์๋๋ก) | |
| if path in seen: | |
| continue | |
| seen.add(path) | |
| url = ( | |
| f"https://huggingface.co/datasets/{REPO_ID}" | |
| f"/resolve/{BRANCH}/{path}" | |
| ) | |
| entries.append( | |
| { | |
| "url": url, | |
| "id": path, | |
| "action": action_name, | |
| } | |
| ) | |
| # ์ก์ ์ด๋ฆ, ๊ทธ ๋ค์ id ๊ธฐ์ค์ผ๋ก ์ ๋ ฌ(๋ณด๊ธฐ ํธํ๊ฒ) | |
| entries.sort(key=lambda e: (e["action"], e["id"])) | |
| print(f"Collected {len(entries)} videos across {len(ACTIONS_13)} actions.") | |
| out_path = Path(OUTPUT_JSON) | |
| out_path.write_text(json.dumps(entries, indent=2), encoding="utf-8") | |
| print(f"Saved to {out_path.resolve()}") | |
| if __name__ == "__main__": | |
| # HF_TOKEN ํ๊ฒฝ๋ณ์๋ ๋ฏธ๋ฆฌ ์ค์ ๋์ด ์์ด์ผ ํจ | |
| # export HF_TOKEN=... | |
| main() | |