| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9215358931552586, |
| "global_step": 14000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.652198107957708e-05, |
| "loss": 3.4104, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_loss": 2.8557941913604736, |
| "eval_runtime": 912.4041, |
| "eval_samples_per_second": 168.064, |
| "eval_steps_per_second": 10.504, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.304396215915415e-05, |
| "loss": 2.8175, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 2.6051383018493652, |
| "eval_runtime": 902.9796, |
| "eval_samples_per_second": 169.818, |
| "eval_steps_per_second": 10.614, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.956594323873122e-05, |
| "loss": 2.6285, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_loss": 2.4875028133392334, |
| "eval_runtime": 904.858, |
| "eval_samples_per_second": 169.465, |
| "eval_steps_per_second": 10.592, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.608792431830829e-05, |
| "loss": 2.5091, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 2.428805351257324, |
| "eval_runtime": 903.5608, |
| "eval_samples_per_second": 169.709, |
| "eval_steps_per_second": 10.607, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.2609905397885366e-05, |
| "loss": 3.4219, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_loss": 5.509440898895264, |
| "eval_runtime": 902.6374, |
| "eval_samples_per_second": 169.882, |
| "eval_steps_per_second": 10.618, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.913188647746244e-05, |
| "loss": 3.0413, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 2.3123719692230225, |
| "eval_runtime": 903.1677, |
| "eval_samples_per_second": 169.782, |
| "eval_steps_per_second": 10.612, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.5653867557039513e-05, |
| "loss": 2.3193, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_loss": 2.2579617500305176, |
| "eval_runtime": 903.9178, |
| "eval_samples_per_second": 169.642, |
| "eval_steps_per_second": 10.603, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2175848636616583e-05, |
| "loss": 2.2688, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 2.231369972229004, |
| "eval_runtime": 903.0629, |
| "eval_samples_per_second": 169.802, |
| "eval_steps_per_second": 10.613, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8697829716193657e-05, |
| "loss": 2.2357, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_loss": 2.1945502758026123, |
| "eval_runtime": 903.9088, |
| "eval_samples_per_second": 169.643, |
| "eval_steps_per_second": 10.603, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.521981079577073e-05, |
| "loss": 2.2002, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_loss": 2.1717965602874756, |
| "eval_runtime": 903.6883, |
| "eval_samples_per_second": 169.685, |
| "eval_steps_per_second": 10.605, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1741791875347802e-05, |
| "loss": 2.1509, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_loss": 2.1417629718780518, |
| "eval_runtime": 903.5674, |
| "eval_samples_per_second": 169.707, |
| "eval_steps_per_second": 10.607, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.263772954924876e-06, |
| "loss": 2.1288, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 2.1314194202423096, |
| "eval_runtime": 902.6397, |
| "eval_samples_per_second": 169.882, |
| "eval_steps_per_second": 10.618, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.785754034501947e-06, |
| "loss": 2.1154, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_loss": 2.120832920074463, |
| "eval_runtime": 902.7038, |
| "eval_samples_per_second": 169.87, |
| "eval_steps_per_second": 10.617, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.3077351140790207e-06, |
| "loss": 2.1057, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_loss": 2.1087450981140137, |
| "eval_runtime": 902.6299, |
| "eval_samples_per_second": 169.884, |
| "eval_steps_per_second": 10.618, |
| "step": 14000 |
| } |
| ], |
| "max_steps": 14376, |
| "num_train_epochs": 3, |
| "total_flos": 2.7036839150594458e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|