#!/usr/bin/env python3 """ Setup HuggingFace Hub repositories for ARC-AI project. Creates: - arc-ai/embodied-intelligence (model: main project hub) - arc-ai/diffusion-policy-physics (model: physics-pretrained policy) - arc-ai/sim-demonstrations (dataset: robot demo data) Usage: pip install huggingface_hub huggingface-cli login python scripts/setup_huggingface.py """ import argparse import json import logging from pathlib import Path logger = logging.getLogger(__name__) def create_repos(org: str = "arc-ai", dry_run: bool = False): from huggingface_hub import HfApi, create_repo api = HfApi() repos = [ { "repo_id": f"{org}/embodied-intelligence", "repo_type": "model", "description": "ARC-AI Embodied Intelligence - Physics-grounded manipulation policies", }, { "repo_id": f"{org}/diffusion-policy-physics", "repo_type": "model", "description": "Diffusion Policy pretrained on THE WELL physics simulations", }, { "repo_id": f"{org}/act-policy", "repo_type": "model", "description": "Action Chunking Transformer for robotic manipulation", }, { "repo_id": f"{org}/sim-demonstrations", "repo_type": "dataset", "description": "10K expert demonstrations (7-DOF Franka reach task, MuJoCo)", }, { "repo_id": f"{org}/sota-benchmarks", "repo_type": "dataset", "description": "SOTA simulation benchmark results (24 adversarial scenarios)", }, ] for repo in repos: if dry_run: logger.info(f"[DRY RUN] Would create: {repo['repo_id']} ({repo['repo_type']})") continue try: create_repo( repo_id=repo["repo_id"], repo_type=repo["repo_type"], exist_ok=True, private=False, ) logger.info(f"Created/verified: {repo['repo_id']}") except Exception as e: logger.error(f"Failed {repo['repo_id']}: {e}") return repos def upload_existing_artifacts(org: str = "arc-ai", sim_dir: str = None): """Upload existing trained models and results to HuggingFace.""" from huggingface_hub import HfApi api = HfApi() sim_path = Path(sim_dir) if sim_dir else Path(__file__).parent.parent.parent.parent / "sim" uploads = [ { "file": sim_path / "diffusion_policy.pt", "repo": f"{org}/diffusion-policy-physics", "path_in_repo": "checkpoints/diffusion_policy_500k.pt", }, { "file": sim_path / "act_policy.pt", "repo": f"{org}/act-policy", "path_in_repo": "checkpoints/act_policy_100k.pt", }, { "file": sim_path / "sota_full_results.json", "repo": f"{org}/sota-benchmarks", "path_in_repo": "results/sota_full_results.json", }, { "file": sim_path / "SOTA_SIMULATION_REPORT.md", "repo": f"{org}/sota-benchmarks", "path_in_repo": "REPORT.md", }, ] for item in uploads: if not item["file"].exists(): logger.warning(f"Not found: {item['file']}") continue try: api.upload_file( path_or_fileobj=str(item["file"]), path_in_repo=item["path_in_repo"], repo_id=item["repo"], repo_type="model" if "policy" in item["repo"] or "benchmarks" in item["repo"] else "dataset", ) logger.info(f"Uploaded: {item['file'].name} → {item['repo']}/{item['path_in_repo']}") except Exception as e: logger.error(f"Upload failed {item['file'].name}: {e}") def create_model_card(org: str = "arc-ai"): """Generate model card for the main model repo.""" card = f"""--- library_name: pytorch tags: - robotics - diffusion-policy - embodied-ai - physics-pretraining - manipulation license: apache-2.0 datasets: - {org}/sim-demonstrations - polymathic-ai/the_well --- # ARC-AI: Physics-Grounded Diffusion Policy Embodied intelligence system for autonomous robotic manipulation, pretrained on diverse physics simulations from THE WELL. ## Architecture - **Backbone**: Physics Temporal Encoder (Transformer, 4 layers, 8 heads) - **Policy**: Denoising Diffusion Probabilistic Model (DDPM) - **Parameters**: 1.5M (Diffusion) / 2.2M (ACT) - **Pretraining**: THE WELL (15TB physics simulations) - **Fine-tuning**: 10K expert demonstrations (MuJoCo, 7-DOF Franka) ## Benchmark Results | Metric | Value | |--------|-------| | GPU Throughput | 8.97M samples/sec (131K parallel envs) | | Training Speed | 64 steps/sec on A100 | | Diffusion Loss | 0.046 (500K steps) | | Adversarial Scenarios | 24 tested | | Physics Stability | 10M steps, zero failures | ## Usage ```python from arc_ai.policy import PhysicsDiffusionPolicy, PhysicsPretrainConfig config = PhysicsPretrainConfig(hidden_dim=256, n_layers=4) policy = PhysicsDiffusionPolicy(obs_dim=20, action_dim=7, config=config) policy.load_state_dict(torch.load("checkpoints/diffusion_policy_500k.pt")) # Inference obs = torch.randn(1, 4, 20) # (batch, context_frames, obs_dim) actions = policy.predict(obs, n_inference_steps=10) # actions shape: (1, 16, 7) — 16-step action chunk ``` ## Pretraining on THE WELL ```python from arc_ai.training import PhysicsPretrainer, PhysicsPretrainConfig config = PhysicsPretrainConfig( datasets=["polymathic-ai/gray_scott", "polymathic-ai/rayleigh_benard"], streaming=True, num_steps=200000, ) pretrainer = PhysicsPretrainer(config) encoder = pretrainer.train() ``` ## Citation ```bibtex @software{{arc_ai_2026, title={{ARC-AI: Physics-Grounded Embodied Intelligence}}, year={{2026}}, url={{https://huggingface.co/{org}/embodied-intelligence}} }} ``` """ return card def create_dataset_card(org: str = "arc-ai"): """Generate dataset card for sim-demonstrations.""" card = f"""--- task_categories: - robotics tags: - manipulation - mujoco - franka - expert-demonstrations size_categories: - 1M