Instructions to use wfen/Cosmos3-Nano-FP8 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use wfen/Cosmos3-Nano-FP8 with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("wfen/Cosmos3-Nano-FP8", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| """Load this quantized Cosmos3-Nano (FP8, safetensors). Self-contained — no project `src/` needed. | |
| Requires: diffusers (git main / >=0.39), nvidia-modelopt, torch (cu128), safetensors. | |
| from load_quantized import load | |
| pipe = load() # uses this dir, or pass a repo id / local dir | |
| import torch | |
| with torch.autocast("cuda", torch.bfloat16): | |
| img = pipe("a corgi astronaut", num_frames=1, height=480, width=480).video[0][0] | |
| Format (Path B; see ../docs/reports/session_3.md): the FP8 transformer is stored as **safetensors** | |
| (`transformer/diffusion_pytorch_model.safetensors`: 505 weight-only E4M3 weights + per-tensor | |
| `weight_quantizer._amax`/`._scale` buffers) plus a tiny tensor-free `transformer/modelopt_state.pt` | |
| structural sidecar (the quantizer layout). The original `transformer/modelopt_quantized.pt` is | |
| **retained** as a fallback (loadable via `modelopt.torch.opt.restore`); this loader does NOT use it. | |
| SECURITY: `modelopt_state.pt` is loaded with `torch.load(weights_only=False)`, which executes | |
| arbitrary pickle. Load this checkpoint ONLY from a source you trust (a tampered sidecar = remote | |
| code execution). The safetensors weights themselves are safe; only the structural sidecar is pickle. | |
| """ | |
| import glob | |
| import os | |
| import torch | |
| from diffusers import Cosmos3OmniPipeline, Cosmos3OmniTransformer, UniPCMultistepScheduler | |
| import modelopt.torch.opt as mto | |
| from safetensors.torch import load_file | |
| def load_transformer(local): | |
| """Materialize the quantized transformer from safetensors + the structural sidecar (no `.pt`).""" | |
| cfg = {**Cosmos3OmniTransformer.load_config(f"{local}/transformer/config.json"), "action_gen": False} | |
| tf = Cosmos3OmniTransformer.from_config(cfg).to(torch.bfloat16) | |
| state = torch.load(f"{local}/transformer/modelopt_state.pt", weights_only=False) | |
| restored = mto.restore_from_modelopt_state(tf, state) | |
| if restored is not None: | |
| tf = restored | |
| tensors = {} | |
| for shard in sorted(glob.glob(f"{local}/transformer/*.safetensors")): | |
| tensors.update(load_file(shard)) | |
| tf.load_state_dict(tensors, strict=True) | |
| return tf | |
| def load(repo_or_dir=".", device="cuda"): | |
| if os.path.isdir(repo_or_dir): | |
| local = repo_or_dir | |
| else: | |
| from huggingface_hub import snapshot_download | |
| local = snapshot_download(repo_or_dir) | |
| tf = load_transformer(local) | |
| pipe = Cosmos3OmniPipeline.from_pretrained( | |
| local, transformer=tf, torch_dtype=torch.bfloat16, enable_safety_checker=False | |
| ) | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=10.0) | |
| return pipe.to(device) | |
| if __name__ == "__main__": | |
| pipe = load() | |
| with torch.autocast("cuda", dtype=torch.bfloat16): # required: float32 rotary tensors -> bf16 linears | |
| img = pipe("A red panda astronaut floating in a nebula, highly detailed", | |
| num_frames=1, height=480, width=480).video[0][0] | |
| img.save("out.png") | |
| print("saved out.png") | |