#!/usr/bin/env python3 """ run_demo.py — Self-contained dummy demo of upload_to_hub.py ============================================================ Builds a fake HF deployment package WITHOUT requiring torch or a real checkpoint, so you can see exactly what files get uploaded. This demo: 1. Creates a dummy checkpoint, args.json, label stats files 2. Patches torch import to a stub so upload_to_hub.py can run 3. Calls package_model() in dry-run mode 4. Lists every file in the package with its purpose Run: python run_demo.py """ from __future__ import annotations import json import shutil import sys import types from pathlib import Path import numpy as np # ── §1 Build a torch stub (so upload_to_hub.py can be imported) ─────────── class _TorchStub: class Tensor: def __init__(self, data): self._d = np.asarray(data) self.shape = self._d.shape def numel(self): return int(np.prod(self.shape)) def clone(self): return self def contiguous(self): return self @property def dtype(self): return _DType() @staticmethod def load(path, **kw): # Simulate loading our dummy checkpoint return _DUMMY_CKPT @staticmethod def save(obj, path): # Mimic torch.save — for the .bin fallback path with open(path, "wb") as f: f.write(b"DUMMY_TORCH_BIN") class _DType: @property def is_floating_point(self): return True # Mock checkpoint structure that mirrors a real DDPM checkpoint _DUMMY_CKPT = { "model_state_dict": { "unet.conv.weight": _TorchStub.Tensor(np.zeros((64, 1, 3, 3), dtype=np.float32)), "unet.conv.bias": _TorchStub.Tensor(np.zeros(64, dtype=np.float32)), "unet.label_emb.weight":_TorchStub.Tensor(np.zeros((64, 2), dtype=np.float32)), "unet.label_emb.bias": _TorchStub.Tensor(np.zeros(64, dtype=np.float32)), "unet.out.weight": _TorchStub.Tensor(np.zeros((1, 64, 1, 1), dtype=np.float32)), "unet.out.bias": _TorchStub.Tensor(np.zeros(1, dtype=np.float32)), }, "ema_shadow": { "unet.conv.weight": _TorchStub.Tensor(np.ones((64, 1, 3, 3), dtype=np.float32)*0.01), "unet.conv.bias": _TorchStub.Tensor(np.zeros(64, dtype=np.float32)), "unet.label_emb.weight":_TorchStub.Tensor(np.zeros((64, 2), dtype=np.float32)), "unet.label_emb.bias": _TorchStub.Tensor(np.zeros(64, dtype=np.float32)), "unet.out.weight": _TorchStub.Tensor(np.zeros((1, 64, 1, 1), dtype=np.float32)), "unet.out.bias": _TorchStub.Tensor(np.zeros(1, dtype=np.float32)), }, "epoch": 100, } # Stub safetensors too (writes a fake binary blob) class _SafetensorsStub: @staticmethod def save_file(state_dict, path): # Just write a fake header so file exists with realistic size # In reality safetensors writes a JSON header + binary tensor data total_bytes = sum(t.numel() * 4 for t in state_dict.values()) with open(path, "wb") as f: f.write(b"\x00" * total_bytes) # ── §2 Set up the dummy project ─────────────────────────────────────────── DEMO_ROOT = Path("/tmp/ddpm_hf_demo") PROJECT = DEMO_ROOT / "project" EXPORT = DEMO_ROOT / "hf_export" if DEMO_ROOT.exists(): shutil.rmtree(DEMO_ROOT) PROJECT.mkdir(parents=True) (PROJECT / "checkpoints").mkdir() # Minimal source files (will be copied into the HF package) (PROJECT / "diffusion_conditional.py").write_text( '"""Stub: our DDPM forward/reverse process implementation."""\n' 'import torch.nn as nn\n' 'class GaussianDiffusion(nn.Module): ...\n' 'class ConditionalDiffusionModel(nn.Module): ...\n' ) (PROJECT / "unet_conditional.py").write_text( '"""Stub: our conditional U-Net architecture."""\n' 'import torch.nn as nn\n' 'class ConditionalUNet(nn.Module): ...\n' ) # Fake checkpoint (file content doesn't matter — torch.load is stubbed) (PROJECT / "checkpoints/best_model.pt").write_bytes(b"DUMMY_CKPT") # Training config (PROJECT / "args.json").write_text(json.dumps({ "image_size": 256, "label_dim": 2, "base_channels": 64, "channel_multipliers": [1, 2, 4, 8], "attention_levels": [2, 3], "dropout": 0.1, "timesteps": 1500, "beta_start": 1e-4, "beta_end": 0.02, "schedule_type": "linear", "ddim_steps": 50, "epochs": 100, "batch_size": 8, "lr": 2e-4, "ema_decay": 0.9999, "seed": 42, }, indent=2)) # Training labels (for label_mu / label_std extraction) labels = np.random.uniform([0.1, 0.6], [0.5, 1.0], (50, 2)).astype(np.float32) np.save(PROJECT / "train_labels_LH_2.npy", labels) # ── §3 Inject stubs into sys.modules and import upload_to_hub ───────────── sys.modules["torch"] = _TorchStub() sys.modules["safetensors"] = types.ModuleType("safetensors") sys.modules["safetensors.torch"] = _SafetensorsStub() # Also stub huggingface_hub so we don't hit the network class _HfStub: HfApi = type("HfApi", (), { "create_repo": lambda *a, **kw: None, "upload_folder": lambda *a, **kw: None, }) login = lambda *a, **kw: None sys.modules["huggingface_hub"] = _HfStub() sys.path.insert(0, str(Path(__file__).parent)) import upload_to_hub # ── §4 Run package_model() in dry-run mode ──────────────────────────────── class FakeArgs: checkpoint = str(PROJECT / "checkpoints/best_model.pt") training_args = str(PROJECT / "args.json") data_dir = str(PROJECT) export_dir = str(EXPORT) no_ema = False repo_id = "demo-user/camels-ddpm-omega-sigma8" print("="*65) print(" DDPM -> Hugging Face Hub Packager (DUMMY DEMO)") print("="*65) folder = upload_to_hub.package_model(FakeArgs()) # ── §5 Verify the result ────────────────────────────────────────────────── print("\n" + "="*65) print(" Package verification") print("="*65) config = json.loads((folder / "config.json").read_text()) print("\nconfig.json contents:") print(json.dumps(config, indent=2)) print(f"\nREADME.md preview (first 50 lines):") print("-"*65) print("\n".join((folder / "README.md").read_text().splitlines()[:50])) print("...") print("-"*65) print(f"\nFile listing of {folder}:") files = sorted(folder.iterdir()) print(f"\n{'File':<32} {'Size':>10} Purpose") print("-"*75) purposes = { "config.json": "Architecture hyperparameters (hub-readable)", "model.safetensors": "Model weights (EMA preferred)", "pytorch_model.bin": "Model weights (fallback if no safetensors)", "README.md": "Model card with YAML metadata + usage docs", "modeling_ddpm_camels.py": "Self-contained loader for `from_pretrained`", "diffusion_conditional.py": "Project file: forward/reverse DDPM process", "unet_conditional.py": "Project file: U-Net architecture", "inference_example.py": "Standalone demo script for users", "requirements.txt": "Pinned Python dependencies", ".gitattributes": "Git LFS configuration for large files", } for f in files: sz = f.stat().st_size sz_str = f"{sz/1e6:.1f}M" if sz > 1e6 else f"{sz/1e3:.1f}K" if sz > 1e3 else f"{sz}B" purpose = purposes.get(f.name, "") print(f" {f.name:<30} {sz_str:>10} {purpose}") print(f"\nDemo complete -> {folder}") print(f"In a real run, the next step is:") print(f" python upload_to_hub.py --checkpoint best_model.pt \\") print(f" --training_args args.json \\") print(f" --repo_id YOUR_USERNAME/camels-ddpm \\") print(f" --private")