{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016145307769929364, "grad_norm": 1.2564789264326033, "learning_rate": 1.4516129032258066e-06, "loss": 0.7453375816345215, "step": 10 }, { "epoch": 0.03229061553985873, "grad_norm": 0.7821283299054873, "learning_rate": 3.0645161290322584e-06, "loss": 0.6848444461822509, "step": 20 }, { "epoch": 0.04843592330978809, "grad_norm": 0.48911008950007523, "learning_rate": 4.67741935483871e-06, "loss": 0.6249249935150146, "step": 30 }, { "epoch": 0.06458123107971746, "grad_norm": 0.48038005351170454, "learning_rate": 6.290322580645162e-06, "loss": 0.594818115234375, "step": 40 }, { "epoch": 0.08072653884964683, "grad_norm": 0.5691753419661809, "learning_rate": 7.903225806451613e-06, "loss": 0.556916618347168, "step": 50 }, { "epoch": 0.09687184661957618, "grad_norm": 0.4427583428156907, "learning_rate": 9.516129032258065e-06, "loss": 0.5476222991943359, "step": 60 }, { "epoch": 0.11301715438950555, "grad_norm": 0.6279589261316931, "learning_rate": 9.99611750215541e-06, "loss": 0.5416336536407471, "step": 70 }, { "epoch": 0.12916246215943492, "grad_norm": 0.4755087891144107, "learning_rate": 9.977115699791622e-06, "loss": 0.5427621841430664, "step": 80 }, { "epoch": 0.14530776992936428, "grad_norm": 0.47753673953077186, "learning_rate": 9.942341621640558e-06, "loss": 0.5373227596282959, "step": 90 }, { "epoch": 0.16145307769929365, "grad_norm": 0.45055986756766414, "learning_rate": 9.89190546533151e-06, "loss": 0.5228934288024902, "step": 100 }, { "epoch": 0.17759838546922302, "grad_norm": 0.4280290012967081, "learning_rate": 9.825967060977933e-06, "loss": 0.5259585380554199, "step": 110 }, { "epoch": 0.19374369323915236, "grad_norm": 1.3885595528460506, "learning_rate": 9.744735364682347e-06, "loss": 0.5162020683288574, "step": 120 }, { "epoch": 0.20988900100908173, "grad_norm": 0.46180163742899866, "learning_rate": 9.648467796363019e-06, "loss": 0.5223239898681641, "step": 130 }, { "epoch": 0.2260343087790111, "grad_norm": 0.5854886166691778, "learning_rate": 9.53746942400078e-06, "loss": 0.5147815227508545, "step": 140 }, { "epoch": 0.24217961654894046, "grad_norm": 0.4453241177281989, "learning_rate": 9.412091996891097e-06, "loss": 0.5132325172424317, "step": 150 }, { "epoch": 0.25832492431886983, "grad_norm": 0.5292387781901823, "learning_rate": 9.272732830964948e-06, "loss": 0.5157772541046143, "step": 160 }, { "epoch": 0.27447023208879917, "grad_norm": 0.44718182092406705, "learning_rate": 9.119833549710927e-06, "loss": 0.5131439685821533, "step": 170 }, { "epoch": 0.29061553985872857, "grad_norm": 0.490885659513409, "learning_rate": 8.953878684688492e-06, "loss": 0.5093417644500733, "step": 180 }, { "epoch": 0.3067608476286579, "grad_norm": 0.4132644245198082, "learning_rate": 8.775394140067299e-06, "loss": 0.5086756706237793, "step": 190 }, { "epoch": 0.3229061553985873, "grad_norm": 0.4174212395756048, "learning_rate": 8.584945526058426e-06, "loss": 0.5185892105102539, "step": 200 }, { "epoch": 0.33905146316851664, "grad_norm": 0.4204703411929031, "learning_rate": 8.383136366518788e-06, "loss": 0.5063163757324218, "step": 210 }, { "epoch": 0.35519677093844604, "grad_norm": 0.5602588417199708, "learning_rate": 8.170606186408756e-06, "loss": 0.5080778121948242, "step": 220 }, { "epoch": 0.3713420787083754, "grad_norm": 0.722448363378425, "learning_rate": 7.948028485163744e-06, "loss": 0.5074011802673339, "step": 230 }, { "epoch": 0.3874873864783047, "grad_norm": 0.4692237397451838, "learning_rate": 7.716108602402094e-06, "loss": 0.4988128662109375, "step": 240 }, { "epoch": 0.4036326942482341, "grad_norm": 0.4264064356657283, "learning_rate": 7.475581482732717e-06, "loss": 0.5008936405181885, "step": 250 }, { "epoch": 0.41977800201816345, "grad_norm": 0.44503110431079446, "learning_rate": 7.2272093467457226e-06, "loss": 0.5001473426818848, "step": 260 }, { "epoch": 0.43592330978809285, "grad_norm": 0.4451748109485531, "learning_rate": 6.971779275566593e-06, "loss": 0.5012603759765625, "step": 270 }, { "epoch": 0.4520686175580222, "grad_norm": 0.4559698908675189, "learning_rate": 6.710100716628345e-06, "loss": 0.4997749328613281, "step": 280 }, { "epoch": 0.4682139253279516, "grad_norm": 0.6917328470735025, "learning_rate": 6.443002918565754e-06, "loss": 0.5029205322265625, "step": 290 }, { "epoch": 0.4843592330978809, "grad_norm": 0.4424890527067699, "learning_rate": 6.171332303360411e-06, "loss": 0.4997657299041748, "step": 300 }, { "epoch": 0.5005045408678103, "grad_norm": 0.4720083796406712, "learning_rate": 5.895949784064126e-06, "loss": 0.49444894790649413, "step": 310 }, { "epoch": 0.5166498486377397, "grad_norm": 0.39089223138014384, "learning_rate": 5.617728036600734e-06, "loss": 0.4930765151977539, "step": 320 }, { "epoch": 0.5327951564076691, "grad_norm": 0.47595217764441355, "learning_rate": 5.337548734291827e-06, "loss": 0.48953914642333984, "step": 330 }, { "epoch": 0.5489404641775983, "grad_norm": 0.4437674870169117, "learning_rate": 5.0562997538701295e-06, "loss": 0.4975291728973389, "step": 340 }, { "epoch": 0.5650857719475277, "grad_norm": 0.44066493166516885, "learning_rate": 4.7748723618344865e-06, "loss": 0.4917713165283203, "step": 350 }, { "epoch": 0.5812310797174571, "grad_norm": 0.5314417170919645, "learning_rate": 4.49415839006284e-06, "loss": 0.48806447982788087, "step": 360 }, { "epoch": 0.5973763874873865, "grad_norm": 1.4947386266075393, "learning_rate": 4.2150474096335356e-06, "loss": 0.49144668579101564, "step": 370 }, { "epoch": 0.6135216952573158, "grad_norm": 0.4350590241078372, "learning_rate": 3.938423911811021e-06, "loss": 0.4932772159576416, "step": 380 }, { "epoch": 0.6296670030272452, "grad_norm": 0.4903885897812025, "learning_rate": 3.6651645051292415e-06, "loss": 0.48783349990844727, "step": 390 }, { "epoch": 0.6458123107971746, "grad_norm": 0.6878893085423103, "learning_rate": 3.3961351374551234e-06, "loss": 0.49228334426879883, "step": 400 }, { "epoch": 0.6619576185671039, "grad_norm": 0.41010549903336163, "learning_rate": 3.132188351835232e-06, "loss": 0.49144487380981444, "step": 410 }, { "epoch": 0.6781029263370333, "grad_norm": 0.4191620209508489, "learning_rate": 2.874160584821798e-06, "loss": 0.493665075302124, "step": 420 }, { "epoch": 0.6942482341069627, "grad_norm": 0.4637660042911083, "learning_rate": 2.622869515839524e-06, "loss": 0.487306547164917, "step": 430 }, { "epoch": 0.7103935418768921, "grad_norm": 0.4032504407350088, "learning_rate": 2.3791114759930013e-06, "loss": 0.4938949108123779, "step": 440 }, { "epoch": 0.7265388496468214, "grad_norm": 0.463632173922839, "learning_rate": 2.1436589245260375e-06, "loss": 0.482679557800293, "step": 450 }, { "epoch": 0.7426841574167508, "grad_norm": 0.4455245146514806, "learning_rate": 1.9172580009299735e-06, "loss": 0.4972172737121582, "step": 460 }, { "epoch": 0.7588294651866802, "grad_norm": 0.4313461560156946, "learning_rate": 1.7006261604581725e-06, "loss": 0.483304500579834, "step": 470 }, { "epoch": 0.7749747729566094, "grad_norm": 0.40802500432486366, "learning_rate": 1.4944499005397372e-06, "loss": 0.4877010822296143, "step": 480 }, { "epoch": 0.7911200807265388, "grad_norm": 0.48272456919743567, "learning_rate": 1.2993825852972559e-06, "loss": 0.487977123260498, "step": 490 }, { "epoch": 0.8072653884964682, "grad_norm": 0.44816082724241857, "learning_rate": 1.1160423750626693e-06, "loss": 0.4939706325531006, "step": 500 }, { "epoch": 0.8234106962663976, "grad_norm": 0.4192584603019428, "learning_rate": 9.450102674524952e-07, "loss": 0.49027338027954104, "step": 510 }, { "epoch": 0.8395560040363269, "grad_norm": 0.5505560128103805, "learning_rate": 7.868282562101819e-07, "loss": 0.4772333145141602, "step": 520 }, { "epoch": 0.8557013118062563, "grad_norm": 0.408392546206603, "learning_rate": 6.419976136501377e-07, "loss": 0.48678107261657716, "step": 530 }, { "epoch": 0.8718466195761857, "grad_norm": 0.4719879078262765, "learning_rate": 5.109773021462921e-07, "loss": 0.4825289726257324, "step": 540 }, { "epoch": 0.887991927346115, "grad_norm": 0.45855836941006267, "learning_rate": 3.941825196991378e-07, "loss": 0.4801445007324219, "step": 550 }, { "epoch": 0.9041372351160444, "grad_norm": 0.4384510920334218, "learning_rate": 2.919833841902714e-07, "loss": 0.4838963508605957, "step": 560 }, { "epoch": 0.9202825428859738, "grad_norm": 0.4203469347237003, "learning_rate": 2.0470376049398944e-07, "loss": 0.4862393379211426, "step": 570 }, { "epoch": 0.9364278506559032, "grad_norm": 0.5986073034196603, "learning_rate": 1.3262023416276414e-07, "loss": 0.4833742618560791, "step": 580 }, { "epoch": 0.9525731584258325, "grad_norm": 0.4395767849486276, "learning_rate": 7.59612349389599e-08, "loss": 0.48693051338195803, "step": 590 }, { "epoch": 0.9687184661957619, "grad_norm": 0.6521570079176698, "learning_rate": 3.4906312870331973e-08, "loss": 0.4918181896209717, "step": 600 }, { "epoch": 0.9848637739656912, "grad_norm": 0.42588128409358406, "learning_rate": 9.585569323284915e-09, "loss": 0.483614444732666, "step": 610 }, { "epoch": 1.0, "grad_norm": 0.9356887206854803, "learning_rate": 7.924469696718451e-11, "loss": 0.492323112487793, "step": 620 } ], "logging_steps": 10, "max_steps": 620, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 155, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4385102991169946e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }