BryanW's picture
Add files using upload-large-folder tool
b6ff324 verified
experiment:
name: distill_dimo
output_dir: ./experiments/distill_dimo
log_every: 50
save_every: 1000
resume_iter: 0
training:
seed: 42
mixed_precision: bf16
max_train_steps: 10000
gradient_accumulation_steps: 1
distill:
teacher_ckpt: /gfs/space/private/fengzl/World_Model/URSA-1.7B
prompt_source: /gfs/space/private/fengzl/World_Model/Koala-36M-v1
num_frames: 17
height: 320
width: 512
max_prompt_length: 320
batch_size_per_gpu: 1
lambda_kd: 0.5
lambda_pg: 1.0
lambda_ent: 0.01
tau: 1.0
tau_kd: 1.0
enable_teacher_cfg: true
teacher_cfg_scale: 7.0
teacher_cfg_prob: 1.0
teacher_cfg_warmup_steps: 2000
teacher_cfg_trunc: 0.9
lambda_kd_uncond: 0.3
reward_use_guided: false
fake_rounds: 1
use_surrogate_grad: false
lambda_surr: 1.0
t_curriculum_steps: 10000
p_init_mix_ratio: 0.2
p_mix_corrupt_frac: 0.2
collapse_warn_frac: 0.2
aux_noise_std: 1.0e-05
grad_clip: 1.0
optimizer_student:
target: torch.optim.AdamW
params:
lr: 1.0e-05
betas:
- 0.9
- 0.95
weight_decay: 0.01
optimizer_aux:
target: torch.optim.AdamW
params:
lr: 1.0e-05
betas:
- 0.9
- 0.95
weight_decay: 0.01
lr_scheduler:
target: diffnext.engine.lr_scheduler.CosineLR
params:
lr_max: ${optimizer_student.params.lr}
lr_min: 1.0e-06
max_steps: ${training.max_train_steps}
warmup_steps: 500
prompt_dataloader:
shuffle_files: true
shuffle_buffer: 50000
num_workers: 4
caption_field: caption
config: ./configs/distill_dimo.yaml