| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 280, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03571428571428571, |
| "grad_norm": 3.890625, |
| "learning_rate": 4.245298790601447e-06, |
| "loss": 0.3097, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 3.296875, |
| "learning_rate": 9.551922278853254e-06, |
| "loss": 0.2284, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.4858545767105065e-05, |
| "loss": 0.1276, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.345703125, |
| "learning_rate": 2.0165169255356872e-05, |
| "loss": 0.0914, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 0.279296875, |
| "learning_rate": 2.5471792743608676e-05, |
| "loss": 0.0793, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.337890625, |
| "learning_rate": 3.0778416231860484e-05, |
| "loss": 0.0746, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.357421875, |
| "learning_rate": 3.6085039720112295e-05, |
| "loss": 0.0714, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.234375, |
| "learning_rate": 4.1391663208364106e-05, |
| "loss": 0.0687, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 0.671875, |
| "learning_rate": 4.669828669661591e-05, |
| "loss": 0.0697, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.2236328125, |
| "learning_rate": 5.200491018486772e-05, |
| "loss": 0.0656, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39285714285714285, |
| "grad_norm": 0.296875, |
| "learning_rate": 5.305898621472991e-05, |
| "loss": 0.0638, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.478515625, |
| "learning_rate": 5.302954604378656e-05, |
| "loss": 0.0644, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4642857142857143, |
| "grad_norm": 0.328125, |
| "learning_rate": 5.2977489232247505e-05, |
| "loss": 0.0661, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.28125, |
| "learning_rate": 5.290286515915174e-05, |
| "loss": 0.0633, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 0.28515625, |
| "learning_rate": 5.280574460995282e-05, |
| "loss": 0.0626, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.462890625, |
| "learning_rate": 5.2686219709374565e-05, |
| "loss": 0.0641, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6071428571428571, |
| "grad_norm": 0.482421875, |
| "learning_rate": 5.2544403834025184e-05, |
| "loss": 0.0631, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.2421875, |
| "learning_rate": 5.2380431504852706e-05, |
| "loss": 0.0617, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6785714285714286, |
| "grad_norm": 0.2734375, |
| "learning_rate": 5.21944582595437e-05, |
| "loss": 0.061, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.25390625, |
| "learning_rate": 5.198666050498633e-05, |
| "loss": 0.0607, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.228515625, |
| "learning_rate": 5.175723534993779e-05, |
| "loss": 0.0608, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 5.1506400418054675e-05, |
| "loss": 0.0614, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8214285714285714, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 5.123439364146377e-05, |
| "loss": 0.0622, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 5.0941473035069013e-05, |
| "loss": 0.0613, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 0.2265625, |
| "learning_rate": 5.062791645180871e-05, |
| "loss": 0.059, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 5.0294021319095255e-05, |
| "loss": 0.059, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 4.994010435668713e-05, |
| "loss": 0.058, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.1943359375, |
| "learning_rate": 4.95665012762611e-05, |
| "loss": 0.0593, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.059822212904691696, |
| "eval_runtime": 1.242, |
| "eval_samples_per_second": 19.324, |
| "eval_steps_per_second": 19.324, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0357142857142858, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 4.9173566462969296e-05, |
| "loss": 0.0512, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.2490234375, |
| "learning_rate": 4.8761672639283475e-05, |
| "loss": 0.0511, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1071428571428572, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 4.833121051144515e-05, |
| "loss": 0.051, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.318359375, |
| "learning_rate": 4.78825883988571e-05, |
| "loss": 0.0517, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1785714285714286, |
| "grad_norm": 0.306640625, |
| "learning_rate": 4.741623184676759e-05, |
| "loss": 0.052, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 4.6932583222615036e-05, |
| "loss": 0.0529, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 4.643210129641562e-05, |
| "loss": 0.0513, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.2275390625, |
| "learning_rate": 4.5915260805592166e-05, |
| "loss": 0.0508, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.3214285714285714, |
| "grad_norm": 0.2373046875, |
| "learning_rate": 4.538255200465694e-05, |
| "loss": 0.0514, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.3571428571428572, |
| "grad_norm": 0.224609375, |
| "learning_rate": 4.4834480200175554e-05, |
| "loss": 0.0523, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3928571428571428, |
| "grad_norm": 0.298828125, |
| "learning_rate": 4.427156527145303e-05, |
| "loss": 0.0513, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 4.369434117739689e-05, |
| "loss": 0.051, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4642857142857144, |
| "grad_norm": 0.318359375, |
| "learning_rate": 4.3103355450024676e-05, |
| "loss": 0.052, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.267578125, |
| "learning_rate": 4.249916867509674e-05, |
| "loss": 0.0526, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5357142857142856, |
| "grad_norm": 0.2255859375, |
| "learning_rate": 4.1882353960366676e-05, |
| "loss": 0.0524, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 4.125349639195389e-05, |
| "loss": 0.051, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.6071428571428572, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 4.061319247935396e-05, |
| "loss": 0.0492, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.6428571428571428, |
| "grad_norm": 0.220703125, |
| "learning_rate": 3.9962049589613314e-05, |
| "loss": 0.0521, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6785714285714286, |
| "grad_norm": 0.259765625, |
| "learning_rate": 3.930068537120471e-05, |
| "loss": 0.0514, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.21484375, |
| "learning_rate": 3.86297271681503e-05, |
| "loss": 0.0518, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.1962890625, |
| "learning_rate": 3.794981142494782e-05, |
| "loss": 0.0517, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 3.726158308286441e-05, |
| "loss": 0.0502, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8214285714285714, |
| "grad_norm": 0.21484375, |
| "learning_rate": 3.656569496817077e-05, |
| "loss": 0.0511, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 3.586280717289592e-05, |
| "loss": 0.0527, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8928571428571428, |
| "grad_norm": 0.2216796875, |
| "learning_rate": 3.5153586428689876e-05, |
| "loss": 0.0506, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 0.1826171875, |
| "learning_rate": 3.443870547438835e-05, |
| "loss": 0.0513, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9642857142857144, |
| "grad_norm": 0.244140625, |
| "learning_rate": 3.371884241787916e-05, |
| "loss": 0.0515, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.185546875, |
| "learning_rate": 3.299468009287586e-05, |
| "loss": 0.0509, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.05891813337802887, |
| "eval_runtime": 1.1371, |
| "eval_samples_per_second": 21.106, |
| "eval_steps_per_second": 21.106, |
| "step": 280 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.50841030950912e+17, |
| "train_batch_size": 100, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|