| { |
| "step": 2000, |
| "config": { |
| "data": { |
| "train_split": "/content/fleurs-tr-hi-mimi-encoded/splits/train.jsonl", |
| "val_split": "/content/fleurs-tr-hi-mimi-encoded/splits/val.jsonl", |
| "encoded_dir": "/content/fleurs-tr-hi-mimi-encoded/encoded/encoded", |
| "max_frames": 300, |
| "audio_frame_rate": 12.5, |
| "num_workers": 4, |
| "pin_memory": true |
| }, |
| "train": { |
| "num_codebooks": 8, |
| "batch_size": 2, |
| "grad_accum": 4, |
| "max_steps": 5000, |
| "warmup_steps": 500, |
| "min_lr_ratio": 0.1, |
| "depth_chunk_size": 16, |
| "precision": "bfloat16", |
| "max_grad_norm": 1.0, |
| "weight_decay": 0.01, |
| "adam_beta1": 0.9, |
| "adam_beta2": 0.999, |
| "adam_eps": 1e-08 |
| }, |
| "loss": { |
| "text_weight": 0.1, |
| "audio_weight": 1.0 |
| }, |
| "optim": { |
| "lr_lora": 0.00015, |
| "lr_full_ft": 5e-05, |
| "lr_projection": 0.0005, |
| "lr_depth": 0.00025, |
| "lr_audio_embed": 0.0005, |
| "lr_text_embed": 0.0005 |
| }, |
| "logging": { |
| "log_every": 20, |
| "save_every": 1000, |
| "audio_every": 1000, |
| "val_every": 1000, |
| "save_dir": "checkpoints/stage2_scale", |
| "wandb_project": "tinyaya-s2s", |
| "wandb_run_name": "stage2_scale_9k", |
| "use_wandb": true, |
| "push_to_hub": true, |
| "hub_repo_id": "tiny-aya-translate/tinyaya-stage2-scale" |
| } |
| } |
| } |