| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 188, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05319148936170213, |
| "grad_norm": 8.8125, |
| "learning_rate": 5.768752863818909e-06, |
| "loss": 2.3669, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.2979693943592545e-05, |
| "loss": 2.2416, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1595744680851064, |
| "grad_norm": 5.875, |
| "learning_rate": 2.0190635023366185e-05, |
| "loss": 2.0707, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 5.3125, |
| "learning_rate": 2.7401576103139817e-05, |
| "loss": 1.9548, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.26595744680851063, |
| "grad_norm": 5.34375, |
| "learning_rate": 3.461251718291346e-05, |
| "loss": 1.8602, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 5.15625, |
| "learning_rate": 4.18234582626871e-05, |
| "loss": 1.8271, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3723404255319149, |
| "grad_norm": 5.0, |
| "learning_rate": 4.903439934246073e-05, |
| "loss": 1.7719, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 5.6875, |
| "learning_rate": 5.045209560625647e-05, |
| "loss": 1.7481, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4787234042553192, |
| "grad_norm": 5.03125, |
| "learning_rate": 5.0352705666383524e-05, |
| "loss": 1.7525, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 4.96875, |
| "learning_rate": 5.0177288511784766e-05, |
| "loss": 1.7132, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5851063829787234, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.9926553347047093e-05, |
| "loss": 1.6692, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 5.15625, |
| "learning_rate": 4.9601513884480096e-05, |
| "loss": 1.6376, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6914893617021277, |
| "grad_norm": 4.34375, |
| "learning_rate": 4.920348424571745e-05, |
| "loss": 1.6447, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 4.25, |
| "learning_rate": 4.873407364877535e-05, |
| "loss": 1.5965, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7978723404255319, |
| "grad_norm": 3.90625, |
| "learning_rate": 4.8195179902048074e-05, |
| "loss": 1.5848, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 4.21875, |
| "learning_rate": 4.7588981731543924e-05, |
| "loss": 1.5882, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.9042553191489362, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.691792997238264e-05, |
| "loss": 1.5846, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.618473766016631e-05, |
| "loss": 1.5622, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.524075984954834, |
| "eval_runtime": 25.1369, |
| "eval_samples_per_second": 7.956, |
| "eval_steps_per_second": 7.956, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.0106382978723405, |
| "grad_norm": 4.0, |
| "learning_rate": 4.539236906228426e-05, |
| "loss": 1.4786, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.4544027693497756e-05, |
| "loss": 1.2933, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.1170212765957448, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.364314336425706e-05, |
| "loss": 1.2615, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.1702127659574468, |
| "grad_norm": 4.40625, |
| "learning_rate": 4.269335831411411e-05, |
| "loss": 1.2414, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.2234042553191489, |
| "grad_norm": 4.09375, |
| "learning_rate": 4.1698512486292654e-05, |
| "loss": 1.2464, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.0662628002950374e-05, |
| "loss": 1.2409, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.3297872340425532, |
| "grad_norm": 4.21875, |
| "learning_rate": 3.958989290389881e-05, |
| "loss": 1.2773, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.3829787234042552, |
| "grad_norm": 4.0, |
| "learning_rate": 3.8484644214524803e-05, |
| "loss": 1.2606, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.4361702127659575, |
| "grad_norm": 4.0, |
| "learning_rate": 3.735135041136907e-05, |
| "loss": 1.2387, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.4893617021276595, |
| "grad_norm": 4.0, |
| "learning_rate": 3.6194593356253216e-05, |
| "loss": 1.2252, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.5425531914893615, |
| "grad_norm": 3.875, |
| "learning_rate": 3.5019049771994304e-05, |
| "loss": 1.2471, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.5957446808510638, |
| "grad_norm": 4.0625, |
| "learning_rate": 3.3829472334600364e-05, |
| "loss": 1.2095, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.648936170212766, |
| "grad_norm": 3.984375, |
| "learning_rate": 3.263067045839011e-05, |
| "loss": 1.1903, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.702127659574468, |
| "grad_norm": 3.765625, |
| "learning_rate": 3.142749085172191e-05, |
| "loss": 1.2119, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.7553191489361701, |
| "grad_norm": 3.859375, |
| "learning_rate": 3.0224797921944423e-05, |
| "loss": 1.2288, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.8085106382978724, |
| "grad_norm": 4.03125, |
| "learning_rate": 2.9027454108791028e-05, |
| "loss": 1.2019, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.8617021276595744, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.7840300225729064e-05, |
| "loss": 1.2051, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.9148936170212765, |
| "grad_norm": 3.9375, |
| "learning_rate": 2.6668135888743362e-05, |
| "loss": 1.1788, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.9680851063829787, |
| "grad_norm": 3.828125, |
| "learning_rate": 2.5515700111679485e-05, |
| "loss": 1.1928, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 1.4084783792495728, |
| "eval_runtime": 25.4568, |
| "eval_samples_per_second": 7.856, |
| "eval_steps_per_second": 7.856, |
| "step": 188 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 282, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.124053825880064e+17, |
| "train_batch_size": 50, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|