#!/usr/bin/env python3
"""
Setup HuggingFace Hub repositories for ARC-AI project.

Creates:
  - arc-ai/embodied-intelligence     (model: main project hub)
  - arc-ai/diffusion-policy-physics  (model: physics-pretrained policy)
  - arc-ai/sim-demonstrations        (dataset: robot demo data)

Usage:
  pip install huggingface_hub
  huggingface-cli login
  python scripts/setup_huggingface.py
"""

import argparse
import json
import logging
from pathlib import Path

logger = logging.getLogger(__name__)


def create_repos(org: str = "arc-ai", dry_run: bool = False):
    from huggingface_hub import HfApi, create_repo

    api = HfApi()

    repos = [
        {
            "repo_id": f"{org}/embodied-intelligence",
            "repo_type": "model",
            "description": "ARC-AI Embodied Intelligence - Physics-grounded manipulation policies",
        },
        {
            "repo_id": f"{org}/diffusion-policy-physics",
            "repo_type": "model",
            "description": "Diffusion Policy pretrained on THE WELL physics simulations",
        },
        {
            "repo_id": f"{org}/act-policy",
            "repo_type": "model",
            "description": "Action Chunking Transformer for robotic manipulation",
        },
        {
            "repo_id": f"{org}/sim-demonstrations",
            "repo_type": "dataset",
            "description": "10K expert demonstrations (7-DOF Franka reach task, MuJoCo)",
        },
        {
            "repo_id": f"{org}/sota-benchmarks",
            "repo_type": "dataset",
            "description": "SOTA simulation benchmark results (24 adversarial scenarios)",
        },
    ]

    for repo in repos:
        if dry_run:
            logger.info(f"[DRY RUN] Would create: {repo['repo_id']} ({repo['repo_type']})")
            continue

        try:
            create_repo(
                repo_id=repo["repo_id"],
                repo_type=repo["repo_type"],
                exist_ok=True,
                private=False,
            )
            logger.info(f"Created/verified: {repo['repo_id']}")
        except Exception as e:
            logger.error(f"Failed {repo['repo_id']}: {e}")

    return repos


def upload_existing_artifacts(org: str = "arc-ai", sim_dir: str = None):
    """Upload existing trained models and results to HuggingFace."""
    from huggingface_hub import HfApi

    api = HfApi()
    sim_path = Path(sim_dir) if sim_dir else Path(__file__).parent.parent.parent.parent / "sim"

    uploads = [
        {
            "file": sim_path / "diffusion_policy.pt",
            "repo": f"{org}/diffusion-policy-physics",
            "path_in_repo": "checkpoints/diffusion_policy_500k.pt",
        },
        {
            "file": sim_path / "act_policy.pt",
            "repo": f"{org}/act-policy",
            "path_in_repo": "checkpoints/act_policy_100k.pt",
        },
        {
            "file": sim_path / "sota_full_results.json",
            "repo": f"{org}/sota-benchmarks",
            "path_in_repo": "results/sota_full_results.json",
        },
        {
            "file": sim_path / "SOTA_SIMULATION_REPORT.md",
            "repo": f"{org}/sota-benchmarks",
            "path_in_repo": "REPORT.md",
        },
    ]

    for item in uploads:
        if not item["file"].exists():
            logger.warning(f"Not found: {item['file']}")
            continue
        try:
            api.upload_file(
                path_or_fileobj=str(item["file"]),
                path_in_repo=item["path_in_repo"],
                repo_id=item["repo"],
                repo_type="model" if "policy" in item["repo"] or "benchmarks" in item["repo"] else "dataset",
            )
            logger.info(f"Uploaded: {item['file'].name} → {item['repo']}/{item['path_in_repo']}")
        except Exception as e:
            logger.error(f"Upload failed {item['file'].name}: {e}")


def create_model_card(org: str = "arc-ai"):
    """Generate model card for the main model repo."""
    card = f"""---
library_name: pytorch
tags:
  - robotics
  - diffusion-policy
  - embodied-ai
  - physics-pretraining
  - manipulation
license: apache-2.0
datasets:
  - {org}/sim-demonstrations
  - polymathic-ai/the_well
---

# ARC-AI: Physics-Grounded Diffusion Policy

Embodied intelligence system for autonomous robotic manipulation,
pretrained on diverse physics simulations from THE WELL.

## Architecture

- **Backbone**: Physics Temporal Encoder (Transformer, 4 layers, 8 heads)
- **Policy**: Denoising Diffusion Probabilistic Model (DDPM)
- **Parameters**: 1.5M (Diffusion) / 2.2M (ACT)
- **Pretraining**: THE WELL (15TB physics simulations)
- **Fine-tuning**: 10K expert demonstrations (MuJoCo, 7-DOF Franka)

## Benchmark Results

| Metric | Value |
|--------|-------|
| GPU Throughput | 8.97M samples/sec (131K parallel envs) |
| Training Speed | 64 steps/sec on A100 |
| Diffusion Loss | 0.046 (500K steps) |
| Adversarial Scenarios | 24 tested |
| Physics Stability | 10M steps, zero failures |

## Usage

```python
from arc_ai.policy import PhysicsDiffusionPolicy, PhysicsPretrainConfig

config = PhysicsPretrainConfig(hidden_dim=256, n_layers=4)
policy = PhysicsDiffusionPolicy(obs_dim=20, action_dim=7, config=config)
policy.load_state_dict(torch.load("checkpoints/diffusion_policy_500k.pt"))

# Inference
obs = torch.randn(1, 4, 20)  # (batch, context_frames, obs_dim)
actions = policy.predict(obs, n_inference_steps=10)
# actions shape: (1, 16, 7) — 16-step action chunk
```

## Pretraining on THE WELL

```python
from arc_ai.training import PhysicsPretrainer, PhysicsPretrainConfig

config = PhysicsPretrainConfig(
    datasets=["polymathic-ai/gray_scott", "polymathic-ai/rayleigh_benard"],
    streaming=True,
    num_steps=200000,
)
pretrainer = PhysicsPretrainer(config)
encoder = pretrainer.train()
```

## Citation

```bibtex
@software{{arc_ai_2026,
  title={{ARC-AI: Physics-Grounded Embodied Intelligence}},
  year={{2026}},
  url={{https://huggingface.co/{org}/embodied-intelligence}}
}}
```
"""
    return card


def create_dataset_card(org: str = "arc-ai"):
    """Generate dataset card for sim-demonstrations."""
    card = f"""---
task_categories:
  - robotics
tags:
  - manipulation
  - mujoco
  - franka
  - expert-demonstrations
size_categories:
  - 1M<n<10M
license: apache-2.0
---

# ARC-AI Simulation Demonstrations

10,000 expert manipulation trajectories for 7-DOF Franka Panda robot.

## Dataset Details

| Field | Value |
|-------|-------|
| Trajectories | 10,000 |
| Steps per trajectory | 300 |
| Total state-action pairs | 2,840,000 |
| Observation dim | 20 (joints + EE + object) |
| Action dim | 7 (joint positions) |
| Physics engine | MuJoCo |
| Expert algorithm | Jacobian IK (damped least-squares) |
| Task | Reach (EE to object position) |

## Observation Space

| Index | Description |
|-------|-------------|
| 0-6 | Joint positions (rad) |
| 7-13 | Joint velocities (rad/s) |
| 14-16 | End-effector XYZ (m) |
| 17-19 | Object XYZ (m) |

## Usage

```python
from datasets import load_dataset

ds = load_dataset("{org}/sim-demonstrations", streaming=True)
for sample in ds["train"]:
    obs = sample["observations"]   # (300, 20)
    actions = sample["actions"]    # (300, 7)
```
"""
    return card


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")

    parser = argparse.ArgumentParser(description="Setup HuggingFace repos for ARC-AI")
    parser.add_argument("--org", default="arc-ai", help="HuggingFace org/username")
    parser.add_argument("--dry-run", action="store_true", help="Print actions without executing")
    parser.add_argument("--upload", action="store_true", help="Upload existing artifacts")
    parser.add_argument("--sim-dir", default=None, help="Path to sim/ directory")
    args = parser.parse_args()

    create_repos(org=args.org, dry_run=args.dry_run)

    if args.upload and not args.dry_run:
        upload_existing_artifacts(org=args.org, sim_dir=args.sim_dir)

    logger.info("Done. Run 'huggingface-cli login' first if not authenticated.")