"""World Archive Mono sample — layer-switching data explorer (all 9 clips)."""
from __future__ import annotations

import json
import shutil
import tempfile
from functools import lru_cache
from pathlib import Path

import gradio as gr
import pandas as pd
from huggingface_hub import hf_hub_download, hf_hub_url

REPO = "WorldArchive/mono-india-workplace-sample"
LEROBOT_REPO = "WorldArchive/mono-india-workplace-lerobot"
TAGLINE = "Ground truth from the real economy."
CALENDLY = "https://calendly.com/algorithmsdheeraj/30min"
S3 = "https://ggn-egocentric-data-sample.s3.ap-south-1.amazonaws.com/sample_data_june"

# (clip_id, display label)
CLIPS = [
    ("sample_01_shuttle_tube_packaging", "01 · Shuttle packaging"),
    ("sample_02_industrial_sewing_machine", "02 · Industrial sewing"),
    ("sample_03_heatgun_and_batching", "03 · Heat gun & batching"),
    ("sample_04_garment_ironing_and_packing", "04 · Garment ironing"),
    ("sample_05_commercial_catering", "05 · Commercial catering"),
    ("sample_06_cane_weaving", "06 · Cane weaving"),
    ("sample_07_car_detailing", "07 · Car detailing"),
    ("sample_08_primer_and_painting", "08 · Primer & painting"),
    ("sample_09_denting_and_filing", "09 · Denting & filing"),
]
LAYERS = ["plain", "skeleton", "boxes"]


@lru_cache(maxsize=1)
def load_segments_df() -> pd.DataFrame:
    path = hf_hub_download(REPO, "data/segments.parquet", repo_type="dataset")
    return pd.read_parquet(path)


@lru_cache(maxsize=1)
def load_clips_df() -> pd.DataFrame:
    path = hf_hub_download(REPO, "data/clips.parquet", repo_type="dataset")
    return pd.read_parquet(path)


def clip_video_path(clip_id: str, layer: str) -> str:
    rel = f"clips_preview/{clip_id}/{layer}.mp4"
    try:
        src = hf_hub_download(REPO, rel, repo_type="dataset")
        dest = Path(tempfile.gettempdir()) / "wa_explorer" / clip_id / f"{layer}.mp4"
        dest.parent.mkdir(parents=True, exist_ok=True)
        if not dest.exists() or dest.stat().st_mtime < Path(src).stat().st_mtime:
            shutil.copy2(src, dest)
        return str(dest)
    except Exception:
        return hf_hub_url(REPO, rel, repo_type="dataset")


def clip_metadata(clip_id: str) -> str:
    try:
        df = load_clips_df()
        row = df[df["clip_id"] == clip_id].iloc[0].to_dict()
        return json.dumps(row, indent=2, default=str)
    except Exception as e:
        return json.dumps({"clip_id": clip_id, "error": str(e)})


def clip_segments(clip_id: str) -> str:
    try:
        df = load_segments_df()
        video = f"{clip_id}.mp4"
        rows = df[df["video"] == video].sort_values("start_sec")
        lines = []
        for _, r in rows.iterrows():
            lines.append(
                f"**{r['start_sec']:.1f}s–{r['end_sec']:.1f}s** · "
                f"{r.get('action', '?')} **{r.get('object', '?')}**"
            )
        return "\n\n".join(lines[:12]) + ("\n\n_…and more in full pack_" if len(lines) > 12 else "")
    except Exception as e:
        return f"_Could not load segments: {e}_"


def update(clip_label: str, layer: str):
    clip_id = next(c for c, label in CLIPS if label == clip_label)
    try:
        return clip_video_path(clip_id, layer), clip_metadata(clip_id), clip_segments(clip_id)
    except Exception as e:
        return None, json.dumps({"error": str(e)}), f"_Preview failed: {e}_"


with gr.Blocks(title="World Archive Data Explorer", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        f"""
# World Archive
### {TAGLINE}

**Mono Clear** — annotated egocentric manipulation from Indian workplaces (factory, kitchen, repair, craft).

| 9 clips | 218 segments | 8+ layers | LeRobot-ready |
|:-------:|:------------:|:---------:|:-------------:|

[Metadata dataset](https://huggingface.co/datasets/{REPO}) · [LeRobot mirror](https://huggingface.co/datasets/{LEROBOT_REPO}) · [Collection](https://huggingface.co/collections/WorldArchive/physical-ai-india)

[Book a call]({CALENDLY}) · shubham@worldarchive.co · [Full pack ~19 GB]({S3}/index.html) · [worldarchive.co](https://worldarchive.co)

```python
from lerobot.datasets.lerobot_dataset import LeRobotDataset
ds = LeRobotDataset("{LEROBOT_REPO}")  # 9 episodes · 46k frames
```
"""
    )
    with gr.Row():
        clip_dd = gr.Dropdown([l for _, l in CLIPS], value=CLIPS[0][1], label="Clip")
        layer_dd = gr.Radio(LAYERS, value="plain", label="Layer", info="plain · hand skeleton · object boxes")
    video = gr.Video(label="Preview (6s)", autoplay=True)
    with gr.Row():
        meta = gr.Code(label="Clip metadata", language="json", scale=1)
        segs = gr.Markdown(label="Action segments", scale=1)
    clip_dd.change(update, [clip_dd, layer_dd], [video, meta, segs])
    layer_dd.change(update, [clip_dd, layer_dd], [video, meta, segs])
    demo.load(update, [clip_dd, layer_dd], [video, meta, segs])

if __name__ == "__main__":
    demo.launch()