ReasoningCodec / config.yaml
Dongchao's picture
Upload v0.1
e6bf8fc verified
seed: 999
cudnn_deterministic: true
model: diffusion_transformer_1D
dropout_p: 0.1
token_dropout_p: 0.1
num_output_layer: 2
drop_path_rate: 0.0
statistical_prior_path: null
train_data_path: large_audio.scp
val_data_path: val_with_duration.scp
batch_size: 16
max_length: 8000
min_length: 100
n_worker: 4
minibatch_debug: -1
segment_duration: 30
n_epoch: 5
grad_accum: 1
fine_decoder: false
learning_rate: 0.0001
grad_clip: 2.0
warmup_steps: 1000
data_parallel: fsdp
mixed_precision: fp32
grad_precision: fp32
activation_checkpointing: true
weight_decay: 0.05
n_layer: 16
n_head: 12
n_embd: 768
dropout: 0.0
bias: false
block_size: 8192
prefix_lm: false
num_codebooks: 1
num_channels: 32
unet_model_name: transformer-2d
transformer_diffusion_config: model_config.json
sq_config: sq_config.yaml
sq_resume: ckpt_00615000.pth
whisper_path: openai/whisper-medium
reason_lm_path: audiothinking.pth
reconstruction_path: ep5.checkpoint
llm_path: meta-llama/Llama-3.2-3B
prompt_path: prompts/train_prompt.json
best_rq_ckpt: music_ssl.pt
exp_dir: ./
print_freq: 100
save_interval: 10000
resume: null
rank: 0