Spaces:
Runtime error
Runtime error
File size: 4,921 Bytes
1e3598e bd21695 7a4136f 0e7d3e3 7a4136f bd21695 0e7d3e3 7a4136f bd21695 31abd36 0e7d3e3 1e3598e bd21695 1e3598e bd21695 0e7d3e3 bd21695 7a4136f bd21695 0e7d3e3 1e3598e bd21695 0e7d3e3 1e3598e 0e7d3e3 1e3598e 0e7d3e3 bd21695 7a4136f 1e3598e 7a4136f 1e3598e bd21695 0e7d3e3 1e3598e 0e7d3e3 1e3598e 0e7d3e3 1e3598e 0e7d3e3 3a28087 0e7d3e3 1e3598e 0e7d3e3 bd21695 1e3598e bd21695 0e7d3e3 1e3598e 0e7d3e3 bd21695 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | """World Archive Mono sample — layer-switching data explorer (all 9 clips)."""
from __future__ import annotations
import json
import shutil
import tempfile
from functools import lru_cache
from pathlib import Path
import gradio as gr
import pandas as pd
from huggingface_hub import hf_hub_download, hf_hub_url
REPO = "WorldArchive/mono-india-workplace-sample"
LEROBOT_REPO = "WorldArchive/mono-india-workplace-lerobot"
TAGLINE = "Ground truth from the real economy."
CALENDLY = "https://calendly.com/algorithmsdheeraj/30min"
S3 = "https://ggn-egocentric-data-sample.s3.ap-south-1.amazonaws.com/sample_data_june"
# (clip_id, display label)
CLIPS = [
("sample_01_shuttle_tube_packaging", "01 · Shuttle packaging"),
("sample_02_industrial_sewing_machine", "02 · Industrial sewing"),
("sample_03_heatgun_and_batching", "03 · Heat gun & batching"),
("sample_04_garment_ironing_and_packing", "04 · Garment ironing"),
("sample_05_commercial_catering", "05 · Commercial catering"),
("sample_06_cane_weaving", "06 · Cane weaving"),
("sample_07_car_detailing", "07 · Car detailing"),
("sample_08_primer_and_painting", "08 · Primer & painting"),
("sample_09_denting_and_filing", "09 · Denting & filing"),
]
LAYERS = ["plain", "skeleton", "boxes"]
@lru_cache(maxsize=1)
def load_segments_df() -> pd.DataFrame:
path = hf_hub_download(REPO, "data/segments.parquet", repo_type="dataset")
return pd.read_parquet(path)
@lru_cache(maxsize=1)
def load_clips_df() -> pd.DataFrame:
path = hf_hub_download(REPO, "data/clips.parquet", repo_type="dataset")
return pd.read_parquet(path)
def clip_video_path(clip_id: str, layer: str) -> str:
rel = f"clips_preview/{clip_id}/{layer}.mp4"
try:
src = hf_hub_download(REPO, rel, repo_type="dataset")
dest = Path(tempfile.gettempdir()) / "wa_explorer" / clip_id / f"{layer}.mp4"
dest.parent.mkdir(parents=True, exist_ok=True)
if not dest.exists() or dest.stat().st_mtime < Path(src).stat().st_mtime:
shutil.copy2(src, dest)
return str(dest)
except Exception:
return hf_hub_url(REPO, rel, repo_type="dataset")
def clip_metadata(clip_id: str) -> str:
try:
df = load_clips_df()
row = df[df["clip_id"] == clip_id].iloc[0].to_dict()
return json.dumps(row, indent=2, default=str)
except Exception as e:
return json.dumps({"clip_id": clip_id, "error": str(e)})
def clip_segments(clip_id: str) -> str:
try:
df = load_segments_df()
video = f"{clip_id}.mp4"
rows = df[df["video"] == video].sort_values("start_sec")
lines = []
for _, r in rows.iterrows():
lines.append(
f"**{r['start_sec']:.1f}s–{r['end_sec']:.1f}s** · "
f"{r.get('action', '?')} **{r.get('object', '?')}**"
)
return "\n\n".join(lines[:12]) + ("\n\n_…and more in full pack_" if len(lines) > 12 else "")
except Exception as e:
return f"_Could not load segments: {e}_"
def update(clip_label: str, layer: str):
clip_id = next(c for c, label in CLIPS if label == clip_label)
try:
return clip_video_path(clip_id, layer), clip_metadata(clip_id), clip_segments(clip_id)
except Exception as e:
return None, json.dumps({"error": str(e)}), f"_Preview failed: {e}_"
with gr.Blocks(title="World Archive Data Explorer", theme=gr.themes.Soft()) as demo:
gr.Markdown(
f"""
# World Archive
### {TAGLINE}
**Mono Clear** — annotated egocentric manipulation from Indian workplaces (factory, kitchen, repair, craft).
| 9 clips | 218 segments | 8+ layers | LeRobot-ready |
|:-------:|:------------:|:---------:|:-------------:|
[Metadata dataset](https://huggingface.co/datasets/{REPO}) · [LeRobot mirror](https://huggingface.co/datasets/{LEROBOT_REPO}) · [Collection](https://huggingface.co/collections/WorldArchive/physical-ai-india)
[Book a call]({CALENDLY}) · shubham@worldarchive.co · [Full pack ~19 GB]({S3}/index.html) · [worldarchive.co](https://worldarchive.co)
```python
from lerobot.datasets.lerobot_dataset import LeRobotDataset
ds = LeRobotDataset("{LEROBOT_REPO}") # 9 episodes · 46k frames
```
"""
)
with gr.Row():
clip_dd = gr.Dropdown([l for _, l in CLIPS], value=CLIPS[0][1], label="Clip")
layer_dd = gr.Radio(LAYERS, value="plain", label="Layer", info="plain · hand skeleton · object boxes")
video = gr.Video(label="Preview (6s)", autoplay=True)
with gr.Row():
meta = gr.Code(label="Clip metadata", language="json", scale=1)
segs = gr.Markdown(label="Action segments", scale=1)
clip_dd.change(update, [clip_dd, layer_dd], [video, meta, segs])
layer_dd.change(update, [clip_dd, layer_dd], [video, meta, segs])
demo.load(update, [clip_dd, layer_dd], [video, meta, segs])
if __name__ == "__main__":
demo.launch()
|