File size: 4,510 Bytes
ed37502
 
 
 
 
 
 
27fea48
ed37502
27fea48
 
 
ed37502
27fea48
 
 
 
 
 
 
 
ed37502
27fea48
ed37502
27fea48
 
ed37502
27fea48
 
 
 
 
ed37502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e808ae1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed37502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Training Model Registry
# Defines base models available for LoRA training with their optimal parameters

training_models:
  # FLUX - Best for photorealistic images (recommended for realistic person)
  flux2_dev:
    name: "FLUX.2 Dev (Recommended)"
    description: "Latest FLUX model, 32B params, best quality for realistic person. Uses Mistral text encoder."
    hf_repo: "black-forest-labs/FLUX.2-dev"
    hf_filename: "flux2-dev.safetensors"
    model_type: "flux2"
    training_framework: "musubi-tuner"
    resolution: 1024
    learning_rate: 1.0
    network_rank: 64
    network_alpha: 32
    optimizer: "prodigy"
    lr_scheduler: "constant"
    timestep_sampling: "flux2_shift"
    network_module: "networks.lora_flux_2"
    max_train_steps: 50
    fp8_base: true
    gradient_checkpointing: true
    use_case: "images"
    vram_required_gb: 48
    recommended_gpu: "NVIDIA RTX A6000"
    recommended_images: "15-30 high quality photos with detailed captions"
    training_script: "flux_2_train_network.py"
    # Model paths on network volume:
    # DiT: /workspace/models/FLUX.2-dev/flux2-dev.safetensors
    # VAE: /workspace/models/FLUX.2-dev/vae/diffusion_pytorch_model.safetensors
    # Text encoder: /workspace/models/FLUX.2-dev/text_encoder/model-00001-of-00010.safetensors

  flux1_dev:
    name: "FLUX.1 Dev"
    description: "Previous gen FLUX, still excellent for realistic person LoRAs"
    hf_repo: "black-forest-labs/FLUX.1-dev"
    hf_filename: "flux1-dev.safetensors"
    model_type: "flux"
    resolution: 768
    learning_rate: 4e-4
    text_encoder_lr: 4e-5
    network_rank: 32
    network_alpha: 16
    clip_skip: 1
    optimizer: "AdamW8bit"
    lr_scheduler: "cosine"
    min_snr_gamma: 5
    max_train_steps: 1500
    use_case: "images"
    vram_required_gb: 24
    recommended_images: "15-30 high quality photos"
    training_script: "flux_train_network.py"

  # WAN 2.2 - Text-to-Video LoRA training (14B params, uses musubi-tuner)
  wan22_t2v:
    name: "WAN 2.2 T2V (14B)"
    description: "WAN 2.2 text-to-video model. Trains natural-looking video LoRAs. Requires A100 80GB."
    model_type: "wan22"
    training_framework: "musubi-tuner"
    training_script: "wan_train_network.py"
    network_module: "networks.lora_wan"
    resolution: 512
    learning_rate: 2e-4
    network_rank: 64
    network_alpha: 32
    optimizer: "adamw8bit"
    lr_scheduler: "constant"
    timestep_sampling: "shift"
    discrete_flow_shift: 5.0
    gradient_checkpointing: true
    max_train_steps: 2000
    save_every_n_steps: 500
    use_case: "images+video"
    vram_required_gb: 48
    recommended_gpu: "NVIDIA A100 80GB"
    recommended_images: "20-50 high quality photos with detailed captions"
    # Model paths on network volume:
    # DiT low-noise: /workspace/models/WAN2.2/wan2.2_t2v_low_noise_14B_fp16.safetensors
    # DiT high-noise: /workspace/models/WAN2.2/wan2.2_t2v_high_noise_14B_fp16.safetensors
    # VAE: /workspace/models/WAN2.2/Wan2.1_VAE.pth
    # T5: /workspace/models/WAN2.2/models_t5_umt5-xxl-enc-bf16.pth

  # SD 1.5 Realistic Vision - Good balance of quality and speed
  sd15_realistic:
    name: "Realistic Vision V5.1"
    description: "SD 1.5 based, great for realistic humans, faster training"
    hf_repo: "SG161222/Realistic_Vision_V5.1_noVAE"
    hf_filename: "Realistic_Vision_V5.1_fp16-no-ema.safetensors"
    model_type: "sd15"
    resolution: 512
    learning_rate: 1e-4
    network_rank: 32
    network_alpha: 16
    clip_skip: 1
    optimizer: "AdamW8bit"
    use_case: "images"
    vram_required_gb: 8
    recommended_images: "15-30 photos"

  # SDXL - Higher quality than SD 1.5, but more VRAM
  sdxl_base:
    name: "SDXL Base 1.0"
    description: "Higher resolution and quality than SD 1.5"
    hf_repo: "stabilityai/stable-diffusion-xl-base-1.0"
    hf_filename: "sd_xl_base_1.0.safetensors"
    model_type: "sdxl"
    resolution: 1024
    learning_rate: 1e-4
    network_rank: 32
    network_alpha: 16
    clip_skip: 2
    optimizer: "AdamW8bit"
    use_case: "images"
    vram_required_gb: 12
    recommended_images: "20-40 photos"

# Video generation models (for img2video, not training)
video_models:
  wan22_i2v:
    name: "WAN 2.2 Image-to-Video"
    description: "Converts images to videos, use with your trained LoRA images"
    hf_repo: "Wan-AI/Wan2.2-I2V-A14B"
    model_type: "wan22"
    use_case: "img2video"
    vram_required_gb: 24
    resolution: "480p/720p"

# Default model for training
default_training_model: "flux2_dev"