| { | |
| "stage": 2, | |
| "method": "KL distillation", | |
| "best_loss": 2.7305, | |
| "total_steps": 20000, | |
| "temperature": 2.0, | |
| "alpha": 0.7, | |
| "lr": 0.0005, | |
| "status": "COMPLETE", | |
| "teacher": "CohereLabs/tiny-aya-global", | |
| "student_params_m": 721.6, | |
| "languages": 67 | |
| } |
| { | |
| "stage": 2, | |
| "method": "KL distillation", | |
| "best_loss": 2.7305, | |
| "total_steps": 20000, | |
| "temperature": 2.0, | |
| "alpha": 0.7, | |
| "lr": 0.0005, | |
| "status": "COMPLETE", | |
| "teacher": "CohereLabs/tiny-aya-global", | |
| "student_params_m": 721.6, | |
| "languages": 67 | |
| } |