# Stage 2: Structure DiT (Rectified Flow) model: name: "SLAT-Interior-DiT" width: 1536 depth: 30 num_heads: 12 mlp_ratio: 8192 conditioning: image_encoder: "dinov3_large" depth_encoder: "custom_cnn" depth_dim: 256 layout_encoder: "transformer" layout_dim: 512 semantic_dim: 256 optimizer: type: AdamW lr: 1.0e-4 weight_decay: 0.01 scheduler: type: linear_warmup_cosine warmup_steps: 10000 training: batch_size: 8 # per GPU num_gpus: 32 effective_batch_size: 256 gradient_accumulation: 1 max_steps: 400000 mixed_precision: bf16 save_every: 10000 log_every: 100 curriculum: - resolution: 256 steps: 100000 lr: 1.0e-4 - resolution: 512 steps: 200000 lr: 1.0e-4 - resolution: 1024 steps: 100000 lr: 2.0e-5 data: dataset: "InteriorFusion-Train" num_workers: 8 pin_memory: true flow_matching: sigma_min: 0.001 sigma_max: 80.0 p_mean: -1.2 p_std: 1.2 loss: flow_matching: weight: 1.0 depth_guidance: weight: 0.3