| logging: | |
| project: titok_video | |
| run_name: BaseAll-CB16k-TL128-256x33-init-BS64-clipGrad1.0 | |
| logging_interval: 50 | |
| save_path: out_tiny | |
| save_step_interval: 5000 | |
| keep_prior_checkpoints: -1 # -1 to keep all | |
| resume_from_checkpoint: | |
| init_from_checkpoint: base-interp-256x33-TL128.ckpt | |
| model: | |
| titok: | |
| temporal_patch_size: 2 | |
| spatial_patch_size: 4 | |
| fsq_levels: [8, 8, 8, 6, 5] # [7, 5, 5, 5, 5] | |
| num_latent_tokens: 128 | |
| encoder_size: base | |
| decoder_size: base | |
| exp_residual: False | |
| vae: | |
| type: wfvae # cogvideox, vidtok, wfvae | |
| path: preprocess_dataset/wf-16 | |
| latent_channels: 16 | |
| temporal_compression: 4 | |
| spatial_compression: 8 | |
| disc: # experimental | |
| use_disc: False | |
| model_layers: 1 | |
| model_heads: 1 | |
| model_dim: 128 | |
| temporal_patch_size: 4 | |
| spatial_patch_size: 4 | |
| disc_start: 45000 | |
| disc_factor: 1.0 | |
| disc_weight: 0.1 | |
| lecam_weight: 0.0 # disabled | |
| base_gamma: 1 # higher gamma smooths more earlier in training. | |
| final_gamma: 0.1 | |
| dataset: | |
| train_dataset: "/workspace/out_enc_256_33/**/*.pt" | |
| eval_dataset: "/workspace/out_enc_256_33_eval/*.pt" | |
| resolution: 256 | |
| num_frames: 33 | |
| frames_per_second: 8 | |
| workers: 8 | |
| optimizer: | |
| titok: | |
| learning_rate: 1e-4 | |
| beta1: 0.9 | |
| beta2: 0.99 | |
| weight_decay: 1e-4 | |
| warmup_steps: 5000 # 10000 | |
| end_lr: 1e-5 | |
| disc: # not used | |
| learning_rate: 1e-4 | |
| beta1: 0.9 | |
| beta2: 0.99 | |
| weight_decay: 1e-4 | |
| warmup_steps: 1000 | |
| end_lr: 1e-5 | |
| training: | |
| torch_compile: True | |
| seed: 42 | |
| max_grad_norm: 1.0 # not needed? | |
| batch_size: 64 | |
| # strategy: # ddp | |
| enable_tf32: True | |
| precision: bf16-mixed | |
| train_devices: 1 | |
| accelerator: 'gpu' | |
| max_steps: 500000 | |
| val_step_interval: 2000 | |
| eval_recon_log_num: 4 | |
| eval_sample_size: 32 | |
| eval_batch_size: 1 | |
| eval_clear_cache: True | |
| eval_shuffle: True | |
| log_codebook: True |