| seed: 999 | |
| cudnn_deterministic: true | |
| model: diffusion_transformer_1D | |
| dropout_p: 0.1 | |
| token_dropout_p: 0.1 | |
| num_output_layer: 2 | |
| drop_path_rate: 0.0 | |
| statistical_prior_path: null | |
| train_data_path: large_audio.scp | |
| val_data_path: val_with_duration.scp | |
| batch_size: 16 | |
| max_length: 8000 | |
| min_length: 100 | |
| n_worker: 4 | |
| minibatch_debug: -1 | |
| segment_duration: 30 | |
| n_epoch: 5 | |
| grad_accum: 1 | |
| fine_decoder: false | |
| learning_rate: 0.0001 | |
| grad_clip: 2.0 | |
| warmup_steps: 1000 | |
| data_parallel: fsdp | |
| mixed_precision: fp32 | |
| grad_precision: fp32 | |
| activation_checkpointing: true | |
| weight_decay: 0.05 | |
| n_layer: 16 | |
| n_head: 12 | |
| n_embd: 768 | |
| dropout: 0.0 | |
| bias: false | |
| block_size: 8192 | |
| prefix_lm: false | |
| num_codebooks: 1 | |
| num_channels: 32 | |
| unet_model_name: transformer-2d | |
| transformer_diffusion_config: model_config.json | |
| sq_config: sq_config.yaml | |
| sq_resume: ckpt_00615000.pth | |
| whisper_path: openai/whisper-medium | |
| reason_lm_path: audiothinking.pth | |
| reconstruction_path: ep5.checkpoint | |
| llm_path: meta-llama/Llama-3.2-3B | |
| prompt_path: prompts/train_prompt.json | |
| best_rq_ckpt: music_ssl.pt | |
| exp_dir: ./ | |
| print_freq: 100 | |
| save_interval: 10000 | |
| resume: null | |
| rank: 0 | |