| |
| """ |
| Setup HuggingFace Hub repositories for ARC-AI project. |
| |
| Creates: |
| - arc-ai/embodied-intelligence (model: main project hub) |
| - arc-ai/diffusion-policy-physics (model: physics-pretrained policy) |
| - arc-ai/sim-demonstrations (dataset: robot demo data) |
| |
| Usage: |
| pip install huggingface_hub |
| huggingface-cli login |
| python scripts/setup_huggingface.py |
| """ |
|
|
| import argparse |
| import json |
| import logging |
| from pathlib import Path |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| def create_repos(org: str = "arc-ai", dry_run: bool = False): |
| from huggingface_hub import HfApi, create_repo |
|
|
| api = HfApi() |
|
|
| repos = [ |
| { |
| "repo_id": f"{org}/embodied-intelligence", |
| "repo_type": "model", |
| "description": "ARC-AI Embodied Intelligence - Physics-grounded manipulation policies", |
| }, |
| { |
| "repo_id": f"{org}/diffusion-policy-physics", |
| "repo_type": "model", |
| "description": "Diffusion Policy pretrained on THE WELL physics simulations", |
| }, |
| { |
| "repo_id": f"{org}/act-policy", |
| "repo_type": "model", |
| "description": "Action Chunking Transformer for robotic manipulation", |
| }, |
| { |
| "repo_id": f"{org}/sim-demonstrations", |
| "repo_type": "dataset", |
| "description": "10K expert demonstrations (7-DOF Franka reach task, MuJoCo)", |
| }, |
| { |
| "repo_id": f"{org}/sota-benchmarks", |
| "repo_type": "dataset", |
| "description": "SOTA simulation benchmark results (24 adversarial scenarios)", |
| }, |
| ] |
|
|
| for repo in repos: |
| if dry_run: |
| logger.info(f"[DRY RUN] Would create: {repo['repo_id']} ({repo['repo_type']})") |
| continue |
|
|
| try: |
| create_repo( |
| repo_id=repo["repo_id"], |
| repo_type=repo["repo_type"], |
| exist_ok=True, |
| private=False, |
| ) |
| logger.info(f"Created/verified: {repo['repo_id']}") |
| except Exception as e: |
| logger.error(f"Failed {repo['repo_id']}: {e}") |
|
|
| return repos |
|
|
|
|
| def upload_existing_artifacts(org: str = "arc-ai", sim_dir: str = None): |
| """Upload existing trained models and results to HuggingFace.""" |
| from huggingface_hub import HfApi |
|
|
| api = HfApi() |
| sim_path = Path(sim_dir) if sim_dir else Path(__file__).parent.parent.parent.parent / "sim" |
|
|
| uploads = [ |
| { |
| "file": sim_path / "diffusion_policy.pt", |
| "repo": f"{org}/diffusion-policy-physics", |
| "path_in_repo": "checkpoints/diffusion_policy_500k.pt", |
| }, |
| { |
| "file": sim_path / "act_policy.pt", |
| "repo": f"{org}/act-policy", |
| "path_in_repo": "checkpoints/act_policy_100k.pt", |
| }, |
| { |
| "file": sim_path / "sota_full_results.json", |
| "repo": f"{org}/sota-benchmarks", |
| "path_in_repo": "results/sota_full_results.json", |
| }, |
| { |
| "file": sim_path / "SOTA_SIMULATION_REPORT.md", |
| "repo": f"{org}/sota-benchmarks", |
| "path_in_repo": "REPORT.md", |
| }, |
| ] |
|
|
| for item in uploads: |
| if not item["file"].exists(): |
| logger.warning(f"Not found: {item['file']}") |
| continue |
| try: |
| api.upload_file( |
| path_or_fileobj=str(item["file"]), |
| path_in_repo=item["path_in_repo"], |
| repo_id=item["repo"], |
| repo_type="model" if "policy" in item["repo"] or "benchmarks" in item["repo"] else "dataset", |
| ) |
| logger.info(f"Uploaded: {item['file'].name} → {item['repo']}/{item['path_in_repo']}") |
| except Exception as e: |
| logger.error(f"Upload failed {item['file'].name}: {e}") |
|
|
|
|
| def create_model_card(org: str = "arc-ai"): |
| """Generate model card for the main model repo.""" |
| card = f"""--- |
| library_name: pytorch |
| tags: |
| - robotics |
| - diffusion-policy |
| - embodied-ai |
| - physics-pretraining |
| - manipulation |
| license: apache-2.0 |
| datasets: |
| - {org}/sim-demonstrations |
| - polymathic-ai/the_well |
| --- |
| |
| # ARC-AI: Physics-Grounded Diffusion Policy |
| |
| Embodied intelligence system for autonomous robotic manipulation, |
| pretrained on diverse physics simulations from THE WELL. |
| |
| ## Architecture |
| |
| - **Backbone**: Physics Temporal Encoder (Transformer, 4 layers, 8 heads) |
| - **Policy**: Denoising Diffusion Probabilistic Model (DDPM) |
| - **Parameters**: 1.5M (Diffusion) / 2.2M (ACT) |
| - **Pretraining**: THE WELL (15TB physics simulations) |
| - **Fine-tuning**: 10K expert demonstrations (MuJoCo, 7-DOF Franka) |
| |
| ## Benchmark Results |
| |
| | Metric | Value | |
| |--------|-------| |
| | GPU Throughput | 8.97M samples/sec (131K parallel envs) | |
| | Training Speed | 64 steps/sec on A100 | |
| | Diffusion Loss | 0.046 (500K steps) | |
| | Adversarial Scenarios | 24 tested | |
| | Physics Stability | 10M steps, zero failures | |
| |
| ## Usage |
| |
| ```python |
| from arc_ai.policy import PhysicsDiffusionPolicy, PhysicsPretrainConfig |
| |
| config = PhysicsPretrainConfig(hidden_dim=256, n_layers=4) |
| policy = PhysicsDiffusionPolicy(obs_dim=20, action_dim=7, config=config) |
| policy.load_state_dict(torch.load("checkpoints/diffusion_policy_500k.pt")) |
| |
| # Inference |
| obs = torch.randn(1, 4, 20) # (batch, context_frames, obs_dim) |
| actions = policy.predict(obs, n_inference_steps=10) |
| # actions shape: (1, 16, 7) — 16-step action chunk |
| ``` |
| |
| ## Pretraining on THE WELL |
| |
| ```python |
| from arc_ai.training import PhysicsPretrainer, PhysicsPretrainConfig |
| |
| config = PhysicsPretrainConfig( |
| datasets=["polymathic-ai/gray_scott", "polymathic-ai/rayleigh_benard"], |
| streaming=True, |
| num_steps=200000, |
| ) |
| pretrainer = PhysicsPretrainer(config) |
| encoder = pretrainer.train() |
| ``` |
| |
| ## Citation |
| |
| ```bibtex |
| @software{{arc_ai_2026, |
| title={{ARC-AI: Physics-Grounded Embodied Intelligence}}, |
| year={{2026}}, |
| url={{https://huggingface.co/{org}/embodied-intelligence}} |
| }} |
| ``` |
| """ |
| return card |
|
|
|
|
| def create_dataset_card(org: str = "arc-ai"): |
| """Generate dataset card for sim-demonstrations.""" |
| card = f"""--- |
| task_categories: |
| - robotics |
| tags: |
| - manipulation |
| - mujoco |
| - franka |
| - expert-demonstrations |
| size_categories: |
| - 1M<n<10M |
| license: apache-2.0 |
| --- |
| |
| # ARC-AI Simulation Demonstrations |
| |
| 10,000 expert manipulation trajectories for 7-DOF Franka Panda robot. |
| |
| ## Dataset Details |
| |
| | Field | Value | |
| |-------|-------| |
| | Trajectories | 10,000 | |
| | Steps per trajectory | 300 | |
| | Total state-action pairs | 2,840,000 | |
| | Observation dim | 20 (joints + EE + object) | |
| | Action dim | 7 (joint positions) | |
| | Physics engine | MuJoCo | |
| | Expert algorithm | Jacobian IK (damped least-squares) | |
| | Task | Reach (EE to object position) | |
| |
| ## Observation Space |
| |
| | Index | Description | |
| |-------|-------------| |
| | 0-6 | Joint positions (rad) | |
| | 7-13 | Joint velocities (rad/s) | |
| | 14-16 | End-effector XYZ (m) | |
| | 17-19 | Object XYZ (m) | |
| |
| ## Usage |
| |
| ```python |
| from datasets import load_dataset |
| |
| ds = load_dataset("{org}/sim-demonstrations", streaming=True) |
| for sample in ds["train"]: |
| obs = sample["observations"] # (300, 20) |
| actions = sample["actions"] # (300, 7) |
| ``` |
| """ |
| return card |
|
|
|
|
| if __name__ == "__main__": |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") |
|
|
| parser = argparse.ArgumentParser(description="Setup HuggingFace repos for ARC-AI") |
| parser.add_argument("--org", default="arc-ai", help="HuggingFace org/username") |
| parser.add_argument("--dry-run", action="store_true", help="Print actions without executing") |
| parser.add_argument("--upload", action="store_true", help="Upload existing artifacts") |
| parser.add_argument("--sim-dir", default=None, help="Path to sim/ directory") |
| args = parser.parse_args() |
|
|
| create_repos(org=args.org, dry_run=args.dry_run) |
|
|
| if args.upload and not args.dry_run: |
| upload_existing_artifacts(org=args.org, sim_dir=args.sim_dir) |
|
|
| logger.info("Done. Run 'huggingface-cli login' first if not authenticated.") |
|
|