| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 14.0, |
| "eval_steps": 500, |
| "global_step": 6902, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2028397565922921, |
| "grad_norm": 5.140639781951904, |
| "learning_rate": 4.8744929006085194e-05, |
| "loss": 4.9269, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4056795131845842, |
| "grad_norm": 4.1806511878967285, |
| "learning_rate": 4.747718052738337e-05, |
| "loss": 2.5104, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6085192697768763, |
| "grad_norm": 5.7575225830078125, |
| "learning_rate": 4.6209432048681544e-05, |
| "loss": 2.3722, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8113590263691683, |
| "grad_norm": 4.265388011932373, |
| "learning_rate": 4.494168356997972e-05, |
| "loss": 2.2421, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0141987829614605, |
| "grad_norm": 4.239109039306641, |
| "learning_rate": 4.367393509127789e-05, |
| "loss": 2.1943, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2170385395537526, |
| "grad_norm": 3.487968921661377, |
| "learning_rate": 4.2406186612576066e-05, |
| "loss": 1.9779, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4198782961460445, |
| "grad_norm": 3.78326678276062, |
| "learning_rate": 4.113843813387424e-05, |
| "loss": 1.9127, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.6227180527383367, |
| "grad_norm": 3.727482318878174, |
| "learning_rate": 3.9870689655172416e-05, |
| "loss": 1.8781, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8255578093306288, |
| "grad_norm": 3.8917086124420166, |
| "learning_rate": 3.8602941176470595e-05, |
| "loss": 1.8345, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.028397565922921, |
| "grad_norm": 3.717226028442383, |
| "learning_rate": 3.733519269776877e-05, |
| "loss": 1.8036, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.231237322515213, |
| "grad_norm": 3.685636043548584, |
| "learning_rate": 3.606744421906694e-05, |
| "loss": 1.6346, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.4340770791075053, |
| "grad_norm": 3.44212007522583, |
| "learning_rate": 3.479969574036511e-05, |
| "loss": 1.6445, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.636916835699797, |
| "grad_norm": 5.615281105041504, |
| "learning_rate": 3.353194726166329e-05, |
| "loss": 1.6432, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.839756592292089, |
| "grad_norm": 3.544981002807617, |
| "learning_rate": 3.226419878296146e-05, |
| "loss": 1.6753, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.0425963488843815, |
| "grad_norm": 7.216954708099365, |
| "learning_rate": 3.099645030425964e-05, |
| "loss": 1.5979, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.2454361054766734, |
| "grad_norm": 5.732909202575684, |
| "learning_rate": 2.9728701825557807e-05, |
| "loss": 1.4675, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.4482758620689653, |
| "grad_norm": 4.3851847648620605, |
| "learning_rate": 2.8460953346855983e-05, |
| "loss": 1.4753, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.6511156186612577, |
| "grad_norm": 4.533299446105957, |
| "learning_rate": 2.719320486815416e-05, |
| "loss": 1.4698, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.8539553752535496, |
| "grad_norm": 4.276126384735107, |
| "learning_rate": 2.5925456389452336e-05, |
| "loss": 1.4802, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.056795131845842, |
| "grad_norm": 6.0317912101745605, |
| "learning_rate": 2.4657707910750508e-05, |
| "loss": 1.4538, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.259634888438134, |
| "grad_norm": 5.376238822937012, |
| "learning_rate": 2.3389959432048683e-05, |
| "loss": 1.3555, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.462474645030426, |
| "grad_norm": 3.486006259918213, |
| "learning_rate": 2.2122210953346855e-05, |
| "loss": 1.3671, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.665314401622718, |
| "grad_norm": 5.000717639923096, |
| "learning_rate": 2.085446247464503e-05, |
| "loss": 1.3378, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.8681541582150105, |
| "grad_norm": 4.060827732086182, |
| "learning_rate": 1.9586713995943205e-05, |
| "loss": 1.3609, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.070993914807302, |
| "grad_norm": 6.573349475860596, |
| "learning_rate": 1.831896551724138e-05, |
| "loss": 1.3327, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.273833671399594, |
| "grad_norm": 3.522782802581787, |
| "learning_rate": 1.7051217038539555e-05, |
| "loss": 1.2689, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.476673427991886, |
| "grad_norm": 4.277068138122559, |
| "learning_rate": 1.578346855983773e-05, |
| "loss": 1.2501, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.679513184584178, |
| "grad_norm": 3.7447307109832764, |
| "learning_rate": 1.4515720081135902e-05, |
| "loss": 1.2406, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.882352941176471, |
| "grad_norm": 3.9506561756134033, |
| "learning_rate": 1.3247971602434079e-05, |
| "loss": 1.2618, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.085192697768763, |
| "grad_norm": 7.182559490203857, |
| "learning_rate": 1.1980223123732253e-05, |
| "loss": 1.2339, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.288032454361055, |
| "grad_norm": 5.909596920013428, |
| "learning_rate": 1.0712474645030426e-05, |
| "loss": 1.1905, |
| "step": 3100 |
| }, |
| { |
| "epoch": 6.490872210953347, |
| "grad_norm": 5.5547194480896, |
| "learning_rate": 9.444726166328601e-06, |
| "loss": 1.1982, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.693711967545639, |
| "grad_norm": 4.923269271850586, |
| "learning_rate": 8.176977687626776e-06, |
| "loss": 1.1994, |
| "step": 3300 |
| }, |
| { |
| "epoch": 6.896551724137931, |
| "grad_norm": 4.390909194946289, |
| "learning_rate": 6.90922920892495e-06, |
| "loss": 1.2089, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.099391480730223, |
| "grad_norm": 4.504344940185547, |
| "learning_rate": 5.641480730223124e-06, |
| "loss": 1.1866, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.302231237322515, |
| "grad_norm": 4.870874404907227, |
| "learning_rate": 4.373732251521298e-06, |
| "loss": 1.1573, |
| "step": 3600 |
| }, |
| { |
| "epoch": 7.505070993914807, |
| "grad_norm": 4.071926593780518, |
| "learning_rate": 3.1059837728194726e-06, |
| "loss": 1.1305, |
| "step": 3700 |
| }, |
| { |
| "epoch": 7.707910750507099, |
| "grad_norm": 7.0253071784973145, |
| "learning_rate": 1.8382352941176471e-06, |
| "loss": 1.1313, |
| "step": 3800 |
| }, |
| { |
| "epoch": 7.910750507099391, |
| "grad_norm": 6.504556179046631, |
| "learning_rate": 5.704868154158215e-07, |
| "loss": 1.1481, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.113590263691684, |
| "grad_norm": 3.3532488346099854, |
| "learning_rate": 2.1030136192408e-05, |
| "loss": 1.1296, |
| "step": 4000 |
| }, |
| { |
| "epoch": 8.316430020283976, |
| "grad_norm": 5.533351898193359, |
| "learning_rate": 2.0305708490292668e-05, |
| "loss": 1.1625, |
| "step": 4100 |
| }, |
| { |
| "epoch": 8.519269776876268, |
| "grad_norm": 4.058775901794434, |
| "learning_rate": 1.958128078817734e-05, |
| "loss": 1.1552, |
| "step": 4200 |
| }, |
| { |
| "epoch": 8.72210953346856, |
| "grad_norm": 4.454073429107666, |
| "learning_rate": 1.8856853086062014e-05, |
| "loss": 1.184, |
| "step": 4300 |
| }, |
| { |
| "epoch": 8.924949290060852, |
| "grad_norm": 4.909153461456299, |
| "learning_rate": 1.8132425383946684e-05, |
| "loss": 1.202, |
| "step": 4400 |
| }, |
| { |
| "epoch": 9.127789046653144, |
| "grad_norm": 5.935628414154053, |
| "learning_rate": 1.7407997681831353e-05, |
| "loss": 1.1225, |
| "step": 4500 |
| }, |
| { |
| "epoch": 9.330628803245435, |
| "grad_norm": 3.5767104625701904, |
| "learning_rate": 1.6683569979716023e-05, |
| "loss": 1.0954, |
| "step": 4600 |
| }, |
| { |
| "epoch": 9.53346855983773, |
| "grad_norm": 4.276003360748291, |
| "learning_rate": 1.5959142277600696e-05, |
| "loss": 1.1231, |
| "step": 4700 |
| }, |
| { |
| "epoch": 9.736308316430021, |
| "grad_norm": 4.796773433685303, |
| "learning_rate": 1.5234714575485367e-05, |
| "loss": 1.0816, |
| "step": 4800 |
| }, |
| { |
| "epoch": 9.939148073022313, |
| "grad_norm": 5.43841028213501, |
| "learning_rate": 1.4510286873370038e-05, |
| "loss": 1.1061, |
| "step": 4900 |
| }, |
| { |
| "epoch": 10.141987829614605, |
| "grad_norm": 6.818022727966309, |
| "learning_rate": 1.378585917125471e-05, |
| "loss": 1.0616, |
| "step": 5000 |
| }, |
| { |
| "epoch": 10.344827586206897, |
| "grad_norm": 6.812931060791016, |
| "learning_rate": 1.3061431469139382e-05, |
| "loss": 1.0512, |
| "step": 5100 |
| }, |
| { |
| "epoch": 10.547667342799189, |
| "grad_norm": 7.301694393157959, |
| "learning_rate": 1.233700376702405e-05, |
| "loss": 1.0662, |
| "step": 5200 |
| }, |
| { |
| "epoch": 10.75050709939148, |
| "grad_norm": 4.213596820831299, |
| "learning_rate": 1.1612576064908723e-05, |
| "loss": 1.0638, |
| "step": 5300 |
| }, |
| { |
| "epoch": 10.953346855983773, |
| "grad_norm": 4.717465400695801, |
| "learning_rate": 1.0888148362793393e-05, |
| "loss": 1.0559, |
| "step": 5400 |
| }, |
| { |
| "epoch": 11.156186612576064, |
| "grad_norm": 5.471846103668213, |
| "learning_rate": 1.0163720660678066e-05, |
| "loss": 1.0143, |
| "step": 5500 |
| }, |
| { |
| "epoch": 11.359026369168356, |
| "grad_norm": 5.00101375579834, |
| "learning_rate": 9.439292958562735e-06, |
| "loss": 1.0048, |
| "step": 5600 |
| }, |
| { |
| "epoch": 11.561866125760648, |
| "grad_norm": 4.686493396759033, |
| "learning_rate": 8.714865256447408e-06, |
| "loss": 1.0124, |
| "step": 5700 |
| }, |
| { |
| "epoch": 11.764705882352942, |
| "grad_norm": 8.13379955291748, |
| "learning_rate": 7.990437554332078e-06, |
| "loss": 1.0094, |
| "step": 5800 |
| }, |
| { |
| "epoch": 11.967545638945234, |
| "grad_norm": 5.050856590270996, |
| "learning_rate": 7.266009852216748e-06, |
| "loss": 1.0281, |
| "step": 5900 |
| }, |
| { |
| "epoch": 12.170385395537526, |
| "grad_norm": 6.1310319900512695, |
| "learning_rate": 6.54158215010142e-06, |
| "loss": 0.9902, |
| "step": 6000 |
| }, |
| { |
| "epoch": 12.373225152129818, |
| "grad_norm": 4.342954635620117, |
| "learning_rate": 5.817154447986091e-06, |
| "loss": 0.9752, |
| "step": 6100 |
| }, |
| { |
| "epoch": 12.57606490872211, |
| "grad_norm": 4.584819316864014, |
| "learning_rate": 5.092726745870762e-06, |
| "loss": 1.0054, |
| "step": 6200 |
| }, |
| { |
| "epoch": 12.778904665314402, |
| "grad_norm": 4.735635757446289, |
| "learning_rate": 4.368299043755433e-06, |
| "loss": 0.9743, |
| "step": 6300 |
| }, |
| { |
| "epoch": 12.981744421906694, |
| "grad_norm": 6.229862213134766, |
| "learning_rate": 3.6438713416401046e-06, |
| "loss": 0.9504, |
| "step": 6400 |
| }, |
| { |
| "epoch": 13.184584178498985, |
| "grad_norm": 5.926666736602783, |
| "learning_rate": 2.919443639524776e-06, |
| "loss": 0.9525, |
| "step": 6500 |
| }, |
| { |
| "epoch": 13.387423935091277, |
| "grad_norm": 5.178487777709961, |
| "learning_rate": 2.1950159374094467e-06, |
| "loss": 0.9682, |
| "step": 6600 |
| }, |
| { |
| "epoch": 13.59026369168357, |
| "grad_norm": 4.677447319030762, |
| "learning_rate": 1.4705882352941177e-06, |
| "loss": 0.9458, |
| "step": 6700 |
| }, |
| { |
| "epoch": 13.793103448275861, |
| "grad_norm": 6.6742963790893555, |
| "learning_rate": 7.461605331787888e-07, |
| "loss": 0.9402, |
| "step": 6800 |
| }, |
| { |
| "epoch": 13.995943204868155, |
| "grad_norm": 4.439788341522217, |
| "learning_rate": 2.1732831063459866e-08, |
| "loss": 0.9464, |
| "step": 6900 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 6902, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 14, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1060236819072000.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|