| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 374, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02680965147453083, |
| "grad_norm": 1.2595338821411133, |
| "learning_rate": 2.553191489361702e-06, |
| "loss": 1.2564, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05361930294906166, |
| "grad_norm": 0.8211272954940796, |
| "learning_rate": 5.74468085106383e-06, |
| "loss": 1.2441, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08042895442359249, |
| "grad_norm": 0.538611114025116, |
| "learning_rate": 8.936170212765958e-06, |
| "loss": 1.2797, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.10723860589812333, |
| "grad_norm": 0.5027714967727661, |
| "learning_rate": 1.2127659574468084e-05, |
| "loss": 1.2048, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13404825737265416, |
| "grad_norm": 0.5756029486656189, |
| "learning_rate": 1.531914893617021e-05, |
| "loss": 1.1463, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.16085790884718498, |
| "grad_norm": 0.5522626042366028, |
| "learning_rate": 1.8510638297872342e-05, |
| "loss": 1.1146, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1876675603217158, |
| "grad_norm": 0.46792927384376526, |
| "learning_rate": 2.170212765957447e-05, |
| "loss": 1.1107, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.21447721179624665, |
| "grad_norm": 0.50309157371521, |
| "learning_rate": 2.4893617021276595e-05, |
| "loss": 1.0734, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.24128686327077747, |
| "grad_norm": 0.5362275242805481, |
| "learning_rate": 2.8085106382978723e-05, |
| "loss": 1.1417, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2680965147453083, |
| "grad_norm": 0.5180004835128784, |
| "learning_rate": 2.9999624514425663e-05, |
| "loss": 1.0767, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2949061662198391, |
| "grad_norm": 0.549150824546814, |
| "learning_rate": 2.9995400517601418e-05, |
| "loss": 1.0173, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.32171581769436997, |
| "grad_norm": 0.5453264117240906, |
| "learning_rate": 2.9986484493070226e-05, |
| "loss": 1.0292, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3485254691689008, |
| "grad_norm": 0.4556148946285248, |
| "learning_rate": 2.9972879230636458e-05, |
| "loss": 1.0466, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3753351206434316, |
| "grad_norm": 0.5285739302635193, |
| "learning_rate": 2.995458898735669e-05, |
| "loss": 1.0229, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.40214477211796246, |
| "grad_norm": 0.5649049282073975, |
| "learning_rate": 2.993161948620766e-05, |
| "loss": 0.9187, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4289544235924933, |
| "grad_norm": 0.48927125334739685, |
| "learning_rate": 2.9903977914295546e-05, |
| "loss": 0.9293, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.45576407506702415, |
| "grad_norm": 0.5317280292510986, |
| "learning_rate": 2.9871672920607158e-05, |
| "loss": 0.8919, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.48257372654155495, |
| "grad_norm": 0.6437996625900269, |
| "learning_rate": 2.983471461330368e-05, |
| "loss": 0.9002, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5093833780160858, |
| "grad_norm": 0.5929898023605347, |
| "learning_rate": 2.9793114556557854e-05, |
| "loss": 0.8927, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5361930294906166, |
| "grad_norm": 0.6423819065093994, |
| "learning_rate": 2.9746885766935564e-05, |
| "loss": 0.8444, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5630026809651475, |
| "grad_norm": 0.6655325889587402, |
| "learning_rate": 2.9696042709322998e-05, |
| "loss": 0.8668, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5898123324396782, |
| "grad_norm": 0.738153874874115, |
| "learning_rate": 2.96406012924006e-05, |
| "loss": 0.861, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6166219839142091, |
| "grad_norm": 0.8325274586677551, |
| "learning_rate": 2.9580578863665296e-05, |
| "loss": 0.8202, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6434316353887399, |
| "grad_norm": 0.7488914132118225, |
| "learning_rate": 2.9515994204002485e-05, |
| "loss": 0.7684, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6702412868632708, |
| "grad_norm": 0.7176592350006104, |
| "learning_rate": 2.944686752180955e-05, |
| "loss": 0.8053, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6970509383378016, |
| "grad_norm": 0.8424944281578064, |
| "learning_rate": 2.9373220446672687e-05, |
| "loss": 0.7371, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7238605898123325, |
| "grad_norm": 0.7524424195289612, |
| "learning_rate": 2.929507602259908e-05, |
| "loss": 0.7762, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7506702412868632, |
| "grad_norm": 0.8609856367111206, |
| "learning_rate": 2.9212458700806445e-05, |
| "loss": 0.7208, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7774798927613941, |
| "grad_norm": 0.8293251991271973, |
| "learning_rate": 2.912539433207233e-05, |
| "loss": 0.7078, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8042895442359249, |
| "grad_norm": 0.9803098440170288, |
| "learning_rate": 2.9033910158645433e-05, |
| "loss": 0.6701, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8310991957104558, |
| "grad_norm": 0.8112236261367798, |
| "learning_rate": 2.8938034805721598e-05, |
| "loss": 0.6631, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.8579088471849866, |
| "grad_norm": 0.8429144620895386, |
| "learning_rate": 2.883779827248703e-05, |
| "loss": 0.6774, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8847184986595175, |
| "grad_norm": 0.9313247203826904, |
| "learning_rate": 2.873323192273162e-05, |
| "loss": 0.6536, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9115281501340483, |
| "grad_norm": 0.8465601205825806, |
| "learning_rate": 2.8624368475035283e-05, |
| "loss": 0.6697, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.938337801608579, |
| "grad_norm": 1.0963940620422363, |
| "learning_rate": 2.8511241992530403e-05, |
| "loss": 0.6171, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.9651474530831099, |
| "grad_norm": 0.9651685357093811, |
| "learning_rate": 2.8393887872243528e-05, |
| "loss": 0.6423, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9919571045576407, |
| "grad_norm": 0.8953330516815186, |
| "learning_rate": 2.8272342834019738e-05, |
| "loss": 0.5673, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.0160857908847185, |
| "grad_norm": 1.117424726486206, |
| "learning_rate": 2.814664490903309e-05, |
| "loss": 0.5467, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0428954423592494, |
| "grad_norm": 0.8842881917953491, |
| "learning_rate": 2.801683342788671e-05, |
| "loss": 0.5333, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.0697050938337802, |
| "grad_norm": 1.0800713300704956, |
| "learning_rate": 2.7882949008306392e-05, |
| "loss": 0.5203, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.096514745308311, |
| "grad_norm": 1.0497684478759766, |
| "learning_rate": 2.7745033542431358e-05, |
| "loss": 0.48, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.123324396782842, |
| "grad_norm": 1.2430580854415894, |
| "learning_rate": 2.7603130183706317e-05, |
| "loss": 0.4952, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1501340482573728, |
| "grad_norm": 0.9467598795890808, |
| "learning_rate": 2.7457283333378838e-05, |
| "loss": 0.4694, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.1769436997319036, |
| "grad_norm": 1.2572771310806274, |
| "learning_rate": 2.7307538626606314e-05, |
| "loss": 0.486, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2037533512064342, |
| "grad_norm": 1.1049180030822754, |
| "learning_rate": 2.71539429181768e-05, |
| "loss": 0.5286, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.230563002680965, |
| "grad_norm": 1.1196273565292358, |
| "learning_rate": 2.699654426784828e-05, |
| "loss": 0.4741, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.257372654155496, |
| "grad_norm": 1.078421711921692, |
| "learning_rate": 2.6835391925310842e-05, |
| "loss": 0.446, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.2841823056300268, |
| "grad_norm": 1.0374311208724976, |
| "learning_rate": 2.6670536314776593e-05, |
| "loss": 0.4533, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.3109919571045576, |
| "grad_norm": 1.030749797821045, |
| "learning_rate": 2.6502029019202007e-05, |
| "loss": 0.444, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.3378016085790885, |
| "grad_norm": 1.2583510875701904, |
| "learning_rate": 2.632992276414775e-05, |
| "loss": 0.4296, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3646112600536193, |
| "grad_norm": 1.0695934295654297, |
| "learning_rate": 2.615427140128096e-05, |
| "loss": 0.4416, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.3914209115281502, |
| "grad_norm": 1.1195570230484009, |
| "learning_rate": 2.5975129891525173e-05, |
| "loss": 0.431, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.418230563002681, |
| "grad_norm": 1.295849084854126, |
| "learning_rate": 2.5792554287863197e-05, |
| "loss": 0.3968, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.4450402144772119, |
| "grad_norm": 1.0494529008865356, |
| "learning_rate": 2.5606601717798212e-05, |
| "loss": 0.41, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4718498659517425, |
| "grad_norm": 1.0004347562789917, |
| "learning_rate": 2.541733036547876e-05, |
| "loss": 0.41, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.4986595174262733, |
| "grad_norm": 1.2617030143737793, |
| "learning_rate": 2.5224799453492993e-05, |
| "loss": 0.4061, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5254691689008042, |
| "grad_norm": 1.0547535419464111, |
| "learning_rate": 2.5029069224338105e-05, |
| "loss": 0.4104, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.552278820375335, |
| "grad_norm": 1.1151704788208008, |
| "learning_rate": 2.4830200921570557e-05, |
| "loss": 0.4109, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.579088471849866, |
| "grad_norm": 1.1136678457260132, |
| "learning_rate": 2.4628256770643114e-05, |
| "loss": 0.4338, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.6058981233243967, |
| "grad_norm": 1.028518795967102, |
| "learning_rate": 2.4423299959434637e-05, |
| "loss": 0.353, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6327077747989276, |
| "grad_norm": 1.1373231410980225, |
| "learning_rate": 2.421539461847874e-05, |
| "loss": 0.3443, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.6595174262734584, |
| "grad_norm": 0.9851738810539246, |
| "learning_rate": 2.4004605800897486e-05, |
| "loss": 0.3587, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6863270777479893, |
| "grad_norm": 1.0971182584762573, |
| "learning_rate": 2.3790999462046397e-05, |
| "loss": 0.3598, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.7131367292225201, |
| "grad_norm": 1.130505919456482, |
| "learning_rate": 2.3574642438877183e-05, |
| "loss": 0.3877, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.739946380697051, |
| "grad_norm": 1.1338894367218018, |
| "learning_rate": 2.3355602429024606e-05, |
| "loss": 0.3538, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.7667560321715818, |
| "grad_norm": 1.183328628540039, |
| "learning_rate": 2.313394796962403e-05, |
| "loss": 0.3291, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7935656836461127, |
| "grad_norm": 1.1300513744354248, |
| "learning_rate": 2.2909748415866312e-05, |
| "loss": 0.3378, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.8203753351206435, |
| "grad_norm": 1.3838099241256714, |
| "learning_rate": 2.2683073919296715e-05, |
| "loss": 0.3279, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8471849865951744, |
| "grad_norm": 1.07172429561615, |
| "learning_rate": 2.245399540586464e-05, |
| "loss": 0.3473, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.8739946380697052, |
| "grad_norm": 1.7059202194213867, |
| "learning_rate": 2.2222584553731058e-05, |
| "loss": 0.3221, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.900804289544236, |
| "grad_norm": 1.2840522527694702, |
| "learning_rate": 2.1988913770840563e-05, |
| "loss": 0.348, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.9276139410187667, |
| "grad_norm": 1.2816296815872192, |
| "learning_rate": 2.17530561722651e-05, |
| "loss": 0.3025, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.9544235924932976, |
| "grad_norm": 1.3507882356643677, |
| "learning_rate": 2.1515085557326404e-05, |
| "loss": 0.2958, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.9812332439678284, |
| "grad_norm": 1.867775559425354, |
| "learning_rate": 2.127507638650437e-05, |
| "loss": 0.3157, |
| "step": 370 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 935, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.810805485128909e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|