Spaces:
Runtime error
Runtime error
| """World Archive Mono sample — layer-switching data explorer (all 9 clips).""" | |
| from __future__ import annotations | |
| import json | |
| import shutil | |
| import tempfile | |
| from functools import lru_cache | |
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download, hf_hub_url | |
| REPO = "WorldArchive/mono-india-workplace-sample" | |
| LEROBOT_REPO = "WorldArchive/mono-india-workplace-lerobot" | |
| TAGLINE = "Ground truth from the real economy." | |
| CALENDLY = "https://calendly.com/algorithmsdheeraj/30min" | |
| S3 = "https://ggn-egocentric-data-sample.s3.ap-south-1.amazonaws.com/sample_data_june" | |
| # (clip_id, display label) | |
| CLIPS = [ | |
| ("sample_01_shuttle_tube_packaging", "01 · Shuttle packaging"), | |
| ("sample_02_industrial_sewing_machine", "02 · Industrial sewing"), | |
| ("sample_03_heatgun_and_batching", "03 · Heat gun & batching"), | |
| ("sample_04_garment_ironing_and_packing", "04 · Garment ironing"), | |
| ("sample_05_commercial_catering", "05 · Commercial catering"), | |
| ("sample_06_cane_weaving", "06 · Cane weaving"), | |
| ("sample_07_car_detailing", "07 · Car detailing"), | |
| ("sample_08_primer_and_painting", "08 · Primer & painting"), | |
| ("sample_09_denting_and_filing", "09 · Denting & filing"), | |
| ] | |
| LAYERS = ["plain", "skeleton", "boxes"] | |
| def load_segments_df() -> pd.DataFrame: | |
| path = hf_hub_download(REPO, "data/segments.parquet", repo_type="dataset") | |
| return pd.read_parquet(path) | |
| def load_clips_df() -> pd.DataFrame: | |
| path = hf_hub_download(REPO, "data/clips.parquet", repo_type="dataset") | |
| return pd.read_parquet(path) | |
| def clip_video_path(clip_id: str, layer: str) -> str: | |
| rel = f"clips_preview/{clip_id}/{layer}.mp4" | |
| try: | |
| src = hf_hub_download(REPO, rel, repo_type="dataset") | |
| dest = Path(tempfile.gettempdir()) / "wa_explorer" / clip_id / f"{layer}.mp4" | |
| dest.parent.mkdir(parents=True, exist_ok=True) | |
| if not dest.exists() or dest.stat().st_mtime < Path(src).stat().st_mtime: | |
| shutil.copy2(src, dest) | |
| return str(dest) | |
| except Exception: | |
| return hf_hub_url(REPO, rel, repo_type="dataset") | |
| def clip_metadata(clip_id: str) -> str: | |
| try: | |
| df = load_clips_df() | |
| row = df[df["clip_id"] == clip_id].iloc[0].to_dict() | |
| return json.dumps(row, indent=2, default=str) | |
| except Exception as e: | |
| return json.dumps({"clip_id": clip_id, "error": str(e)}) | |
| def clip_segments(clip_id: str) -> str: | |
| try: | |
| df = load_segments_df() | |
| video = f"{clip_id}.mp4" | |
| rows = df[df["video"] == video].sort_values("start_sec") | |
| lines = [] | |
| for _, r in rows.iterrows(): | |
| lines.append( | |
| f"**{r['start_sec']:.1f}s–{r['end_sec']:.1f}s** · " | |
| f"{r.get('action', '?')} **{r.get('object', '?')}**" | |
| ) | |
| return "\n\n".join(lines[:12]) + ("\n\n_…and more in full pack_" if len(lines) > 12 else "") | |
| except Exception as e: | |
| return f"_Could not load segments: {e}_" | |
| def update(clip_label: str, layer: str): | |
| clip_id = next(c for c, label in CLIPS if label == clip_label) | |
| try: | |
| return clip_video_path(clip_id, layer), clip_metadata(clip_id), clip_segments(clip_id) | |
| except Exception as e: | |
| return None, json.dumps({"error": str(e)}), f"_Preview failed: {e}_" | |
| with gr.Blocks(title="World Archive Data Explorer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| f""" | |
| # World Archive | |
| ### {TAGLINE} | |
| **Mono Clear** — annotated egocentric manipulation from Indian workplaces (factory, kitchen, repair, craft). | |
| | 9 clips | 218 segments | 8+ layers | LeRobot-ready | | |
| |:-------:|:------------:|:---------:|:-------------:| | |
| [Metadata dataset](https://huggingface.co/datasets/{REPO}) · [LeRobot mirror](https://huggingface.co/datasets/{LEROBOT_REPO}) · [Collection](https://huggingface.co/collections/WorldArchive/physical-ai-india) | |
| [Book a call]({CALENDLY}) · shubham@worldarchive.co · [Full pack ~19 GB]({S3}/index.html) · [worldarchive.co](https://worldarchive.co) | |
| ```python | |
| from lerobot.datasets.lerobot_dataset import LeRobotDataset | |
| ds = LeRobotDataset("{LEROBOT_REPO}") # 9 episodes · 46k frames | |
| ``` | |
| """ | |
| ) | |
| with gr.Row(): | |
| clip_dd = gr.Dropdown([l for _, l in CLIPS], value=CLIPS[0][1], label="Clip") | |
| layer_dd = gr.Radio(LAYERS, value="plain", label="Layer", info="plain · hand skeleton · object boxes") | |
| video = gr.Video(label="Preview (6s)", autoplay=True) | |
| with gr.Row(): | |
| meta = gr.Code(label="Clip metadata", language="json", scale=1) | |
| segs = gr.Markdown(label="Action segments", scale=1) | |
| clip_dd.change(update, [clip_dd, layer_dd], [video, meta, segs]) | |
| layer_dd.change(update, [clip_dd, layer_dd], [video, meta, segs]) | |
| demo.load(update, [clip_dd, layer_dd], [video, meta, segs]) | |
| if __name__ == "__main__": | |
| demo.launch() | |