| { | |
| "base_model_name": "NousResearch/Meta-Llama-3-8B-Instruct", | |
| "base_model_class": "LlamaForCausalLM", | |
| "base_loaded_in_4bit": true, | |
| "base_loaded_in_8bit": false, | |
| "projections": "gate, down, up, q, v, k, o", | |
| "loss": 2.1147, | |
| "grad_norm": 0.5170264840126038, | |
| "learning_rate": 1.6055383568469583e-09, | |
| "epoch": 0.9990897505916622, | |
| "current_steps": 5487, | |
| "train_runtime": 39825.0119, | |
| "train_samples_per_second": 0.138, | |
| "train_steps_per_second": 0.017, | |
| "total_flos": 5.117631701849211e+17, | |
| "train_loss": 2.262945574157092 | |
| } |