| adapter_path: . | |
| batch_size: 1 | |
| data: data | |
| grad_accumulation_steps: 64 | |
| grad_checkpoint: true | |
| iters: 215030 | |
| learning_rate: 2.0e-05 | |
| lora_parameters: | |
| dropout: 0.1 | |
| rank: 32 | |
| scale: 16 | |
| max_grad_norm: 0.05 | |
| max_seq_length: 1024 | |
| model: sarvamai/sarvam-1 | |
| save_every: 100 | |
| train: true | |