| seed: 999 |
| cudnn_deterministic: false |
| train_data_jsons: |
| - data/train/1splits/data.0.tiny.json |
| valid_data_jsons: |
| - data/val/1splits/data.0.tiny.json |
| batch_scale: 2000 |
| max_length: 1500 |
| min_length: 1 |
| n_worker: 4 |
| local_rank: -1 |
| minibatch_debug: -1 |
| n_epoch: 1 |
| grad_accum: 64 |
| global_learning_rate: 2.0e-06 |
| local_learning_rate: 4.0e-06 |
| grad_clip: 2.0 |
| warmup_steps: 10000 |
| total_steps: 55 |
| dim: 4096 |
| text_card: 1 |
| existing_text_padding_id: 3 |
| n_q: 16 |
| dep_q: 8 |
| card: 2048 |
| num_heads: 32 |
| num_layers: 32 |
| hidden_scale: 4.5 |
| causal: true |
| context: 3000 |
| exp_dir: exp_data/Moshi/v3_full_emo_v0 |
| print_freq: 5 |
| save_interval: 10000 |
| resume: null |
| merged_model_path: exp_data/Moshi/v3_full_emo_v0/model_merged.safetensors |
|
|