Unconditional Image Generation
Diffusers
Safetensors
RAEDiTPipeline
rae
rae-dit
diffusion-transformer
imagenet-256
arxiv:2510.11690
Instructions to use plugyawn/rae-dit-s-ep14-diffusers with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use plugyawn/rae-dit-s-ep14-diffusers with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("plugyawn/rae-dit-s-ep14-diffusers", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| { | |
| "source": { | |
| "weights_repo_or_path": "nyu-visionx/RAE-collections", | |
| "config_repo_or_path": "/home/ubuntu/rae-upstream-configs", | |
| "config_path": "configs/stage2/training/ImageNet256/DiTDH-S_DINOv2-B.yaml", | |
| "vae_model_name_or_path": "nyu-visionx/RAE-dinov2-wReg-base-ViTXL-n08" | |
| }, | |
| "scheduler": { | |
| "num_train_timesteps": 1000, | |
| "shift": 6.928203230275509, | |
| "path_type": "Linear", | |
| "prediction": "velocity", | |
| "time_dist_type": "logit-normal_0_1" | |
| }, | |
| "sampler": { | |
| "mode": "ODE", | |
| "params": { | |
| "sampling_method": "euler", | |
| "num_steps": 50, | |
| "atol": 1e-06, | |
| "rtol": 0.001, | |
| "reverse": false | |
| } | |
| }, | |
| "guidance": { | |
| "method": "cfg", | |
| "scale": 1.0, | |
| "t_min": 0.0, | |
| "t_max": 1.0 | |
| }, | |
| "misc": { | |
| "latent_size": [ | |
| 768, | |
| 16, | |
| 16 | |
| ], | |
| "num_classes": 1000, | |
| "time_dist_shift_dim": 196608, | |
| "time_dist_shift_base": 4096 | |
| }, | |
| "transformer": { | |
| "checkpoint_path": "/home/ubuntu/.cache/huggingface/hub/models--nyu-visionx--RAE-collections/snapshots/1be4f03273523431f099a934da4cf1940dc6039f/DiTs/Dinov2/wReg_base/ImageNet256/DiTDH-S_ep14/stage2_model.pt", | |
| "checkpoint_key": null, | |
| "prefer_ema": true, | |
| "config": { | |
| "sample_size": 16, | |
| "patch_size": 1, | |
| "in_channels": 768, | |
| "hidden_size": [ | |
| 384, | |
| 2048 | |
| ], | |
| "depth": [ | |
| 12, | |
| 2 | |
| ], | |
| "num_heads": [ | |
| 6, | |
| 16 | |
| ], | |
| "mlp_ratio": 4.0, | |
| "class_dropout_prob": 0.1, | |
| "num_classes": 1000, | |
| "use_qknorm": false, | |
| "use_swiglu": true, | |
| "use_rope": true, | |
| "use_rmsnorm": true, | |
| "wo_shift": false, | |
| "use_pos_embed": true | |
| }, | |
| "num_parameters": 196554068 | |
| }, | |
| "pipeline": { | |
| "saved": true, | |
| "id2label_json_path": null | |
| } | |
| } |