Spaces:
Running
Running
File size: 4,510 Bytes
ed37502 27fea48 ed37502 27fea48 ed37502 27fea48 ed37502 27fea48 ed37502 27fea48 ed37502 27fea48 ed37502 e808ae1 ed37502 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | # Training Model Registry
# Defines base models available for LoRA training with their optimal parameters
training_models:
# FLUX - Best for photorealistic images (recommended for realistic person)
flux2_dev:
name: "FLUX.2 Dev (Recommended)"
description: "Latest FLUX model, 32B params, best quality for realistic person. Uses Mistral text encoder."
hf_repo: "black-forest-labs/FLUX.2-dev"
hf_filename: "flux2-dev.safetensors"
model_type: "flux2"
training_framework: "musubi-tuner"
resolution: 1024
learning_rate: 1.0
network_rank: 64
network_alpha: 32
optimizer: "prodigy"
lr_scheduler: "constant"
timestep_sampling: "flux2_shift"
network_module: "networks.lora_flux_2"
max_train_steps: 50
fp8_base: true
gradient_checkpointing: true
use_case: "images"
vram_required_gb: 48
recommended_gpu: "NVIDIA RTX A6000"
recommended_images: "15-30 high quality photos with detailed captions"
training_script: "flux_2_train_network.py"
# Model paths on network volume:
# DiT: /workspace/models/FLUX.2-dev/flux2-dev.safetensors
# VAE: /workspace/models/FLUX.2-dev/vae/diffusion_pytorch_model.safetensors
# Text encoder: /workspace/models/FLUX.2-dev/text_encoder/model-00001-of-00010.safetensors
flux1_dev:
name: "FLUX.1 Dev"
description: "Previous gen FLUX, still excellent for realistic person LoRAs"
hf_repo: "black-forest-labs/FLUX.1-dev"
hf_filename: "flux1-dev.safetensors"
model_type: "flux"
resolution: 768
learning_rate: 4e-4
text_encoder_lr: 4e-5
network_rank: 32
network_alpha: 16
clip_skip: 1
optimizer: "AdamW8bit"
lr_scheduler: "cosine"
min_snr_gamma: 5
max_train_steps: 1500
use_case: "images"
vram_required_gb: 24
recommended_images: "15-30 high quality photos"
training_script: "flux_train_network.py"
# WAN 2.2 - Text-to-Video LoRA training (14B params, uses musubi-tuner)
wan22_t2v:
name: "WAN 2.2 T2V (14B)"
description: "WAN 2.2 text-to-video model. Trains natural-looking video LoRAs. Requires A100 80GB."
model_type: "wan22"
training_framework: "musubi-tuner"
training_script: "wan_train_network.py"
network_module: "networks.lora_wan"
resolution: 512
learning_rate: 2e-4
network_rank: 64
network_alpha: 32
optimizer: "adamw8bit"
lr_scheduler: "constant"
timestep_sampling: "shift"
discrete_flow_shift: 5.0
gradient_checkpointing: true
max_train_steps: 2000
save_every_n_steps: 500
use_case: "images+video"
vram_required_gb: 48
recommended_gpu: "NVIDIA A100 80GB"
recommended_images: "20-50 high quality photos with detailed captions"
# Model paths on network volume:
# DiT low-noise: /workspace/models/WAN2.2/wan2.2_t2v_low_noise_14B_fp16.safetensors
# DiT high-noise: /workspace/models/WAN2.2/wan2.2_t2v_high_noise_14B_fp16.safetensors
# VAE: /workspace/models/WAN2.2/Wan2.1_VAE.pth
# T5: /workspace/models/WAN2.2/models_t5_umt5-xxl-enc-bf16.pth
# SD 1.5 Realistic Vision - Good balance of quality and speed
sd15_realistic:
name: "Realistic Vision V5.1"
description: "SD 1.5 based, great for realistic humans, faster training"
hf_repo: "SG161222/Realistic_Vision_V5.1_noVAE"
hf_filename: "Realistic_Vision_V5.1_fp16-no-ema.safetensors"
model_type: "sd15"
resolution: 512
learning_rate: 1e-4
network_rank: 32
network_alpha: 16
clip_skip: 1
optimizer: "AdamW8bit"
use_case: "images"
vram_required_gb: 8
recommended_images: "15-30 photos"
# SDXL - Higher quality than SD 1.5, but more VRAM
sdxl_base:
name: "SDXL Base 1.0"
description: "Higher resolution and quality than SD 1.5"
hf_repo: "stabilityai/stable-diffusion-xl-base-1.0"
hf_filename: "sd_xl_base_1.0.safetensors"
model_type: "sdxl"
resolution: 1024
learning_rate: 1e-4
network_rank: 32
network_alpha: 16
clip_skip: 2
optimizer: "AdamW8bit"
use_case: "images"
vram_required_gb: 12
recommended_images: "20-40 photos"
# Video generation models (for img2video, not training)
video_models:
wan22_i2v:
name: "WAN 2.2 Image-to-Video"
description: "Converts images to videos, use with your trained LoRA images"
hf_repo: "Wan-AI/Wan2.2-I2V-A14B"
model_type: "wan22"
use_case: "img2video"
vram_required_gb: 24
resolution: "480p/720p"
# Default model for training
default_training_model: "flux2_dev"
|