| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 620, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016145307769929364, |
| "grad_norm": 1.2564789264326033, |
| "learning_rate": 1.4516129032258066e-06, |
| "loss": 0.7453375816345215, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03229061553985873, |
| "grad_norm": 0.7821283299054873, |
| "learning_rate": 3.0645161290322584e-06, |
| "loss": 0.6848444461822509, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04843592330978809, |
| "grad_norm": 0.48911008950007523, |
| "learning_rate": 4.67741935483871e-06, |
| "loss": 0.6249249935150146, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06458123107971746, |
| "grad_norm": 0.48038005351170454, |
| "learning_rate": 6.290322580645162e-06, |
| "loss": 0.594818115234375, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08072653884964683, |
| "grad_norm": 0.5691753419661809, |
| "learning_rate": 7.903225806451613e-06, |
| "loss": 0.556916618347168, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09687184661957618, |
| "grad_norm": 0.4427583428156907, |
| "learning_rate": 9.516129032258065e-06, |
| "loss": 0.5476222991943359, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11301715438950555, |
| "grad_norm": 0.6279589261316931, |
| "learning_rate": 9.99611750215541e-06, |
| "loss": 0.5416336536407471, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12916246215943492, |
| "grad_norm": 0.4755087891144107, |
| "learning_rate": 9.977115699791622e-06, |
| "loss": 0.5427621841430664, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14530776992936428, |
| "grad_norm": 0.47753673953077186, |
| "learning_rate": 9.942341621640558e-06, |
| "loss": 0.5373227596282959, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16145307769929365, |
| "grad_norm": 0.45055986756766414, |
| "learning_rate": 9.89190546533151e-06, |
| "loss": 0.5228934288024902, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17759838546922302, |
| "grad_norm": 0.4280290012967081, |
| "learning_rate": 9.825967060977933e-06, |
| "loss": 0.5259585380554199, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.19374369323915236, |
| "grad_norm": 1.3885595528460506, |
| "learning_rate": 9.744735364682347e-06, |
| "loss": 0.5162020683288574, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.20988900100908173, |
| "grad_norm": 0.46180163742899866, |
| "learning_rate": 9.648467796363019e-06, |
| "loss": 0.5223239898681641, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2260343087790111, |
| "grad_norm": 0.5854886166691778, |
| "learning_rate": 9.53746942400078e-06, |
| "loss": 0.5147815227508545, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.24217961654894046, |
| "grad_norm": 0.4453241177281989, |
| "learning_rate": 9.412091996891097e-06, |
| "loss": 0.5132325172424317, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.25832492431886983, |
| "grad_norm": 0.5292387781901823, |
| "learning_rate": 9.272732830964948e-06, |
| "loss": 0.5157772541046143, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.27447023208879917, |
| "grad_norm": 0.44718182092406705, |
| "learning_rate": 9.119833549710927e-06, |
| "loss": 0.5131439685821533, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.29061553985872857, |
| "grad_norm": 0.490885659513409, |
| "learning_rate": 8.953878684688492e-06, |
| "loss": 0.5093417644500733, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3067608476286579, |
| "grad_norm": 0.4132644245198082, |
| "learning_rate": 8.775394140067299e-06, |
| "loss": 0.5086756706237793, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3229061553985873, |
| "grad_norm": 0.4174212395756048, |
| "learning_rate": 8.584945526058426e-06, |
| "loss": 0.5185892105102539, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.33905146316851664, |
| "grad_norm": 0.4204703411929031, |
| "learning_rate": 8.383136366518788e-06, |
| "loss": 0.5063163757324218, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.35519677093844604, |
| "grad_norm": 0.5602588417199708, |
| "learning_rate": 8.170606186408756e-06, |
| "loss": 0.5080778121948242, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3713420787083754, |
| "grad_norm": 0.722448363378425, |
| "learning_rate": 7.948028485163744e-06, |
| "loss": 0.5074011802673339, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3874873864783047, |
| "grad_norm": 0.4692237397451838, |
| "learning_rate": 7.716108602402094e-06, |
| "loss": 0.4988128662109375, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4036326942482341, |
| "grad_norm": 0.4264064356657283, |
| "learning_rate": 7.475581482732717e-06, |
| "loss": 0.5008936405181885, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.41977800201816345, |
| "grad_norm": 0.44503110431079446, |
| "learning_rate": 7.2272093467457226e-06, |
| "loss": 0.5001473426818848, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.43592330978809285, |
| "grad_norm": 0.4451748109485531, |
| "learning_rate": 6.971779275566593e-06, |
| "loss": 0.5012603759765625, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4520686175580222, |
| "grad_norm": 0.4559698908675189, |
| "learning_rate": 6.710100716628345e-06, |
| "loss": 0.4997749328613281, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4682139253279516, |
| "grad_norm": 0.6917328470735025, |
| "learning_rate": 6.443002918565754e-06, |
| "loss": 0.5029205322265625, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4843592330978809, |
| "grad_norm": 0.4424890527067699, |
| "learning_rate": 6.171332303360411e-06, |
| "loss": 0.4997657299041748, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5005045408678103, |
| "grad_norm": 0.4720083796406712, |
| "learning_rate": 5.895949784064126e-06, |
| "loss": 0.49444894790649413, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5166498486377397, |
| "grad_norm": 0.39089223138014384, |
| "learning_rate": 5.617728036600734e-06, |
| "loss": 0.4930765151977539, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5327951564076691, |
| "grad_norm": 0.47595217764441355, |
| "learning_rate": 5.337548734291827e-06, |
| "loss": 0.48953914642333984, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5489404641775983, |
| "grad_norm": 0.4437674870169117, |
| "learning_rate": 5.0562997538701295e-06, |
| "loss": 0.4975291728973389, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5650857719475277, |
| "grad_norm": 0.44066493166516885, |
| "learning_rate": 4.7748723618344865e-06, |
| "loss": 0.4917713165283203, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5812310797174571, |
| "grad_norm": 0.5314417170919645, |
| "learning_rate": 4.49415839006284e-06, |
| "loss": 0.48806447982788087, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5973763874873865, |
| "grad_norm": 1.4947386266075393, |
| "learning_rate": 4.2150474096335356e-06, |
| "loss": 0.49144668579101564, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6135216952573158, |
| "grad_norm": 0.4350590241078372, |
| "learning_rate": 3.938423911811021e-06, |
| "loss": 0.4932772159576416, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6296670030272452, |
| "grad_norm": 0.4903885897812025, |
| "learning_rate": 3.6651645051292415e-06, |
| "loss": 0.48783349990844727, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6458123107971746, |
| "grad_norm": 0.6878893085423103, |
| "learning_rate": 3.3961351374551234e-06, |
| "loss": 0.49228334426879883, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6619576185671039, |
| "grad_norm": 0.41010549903336163, |
| "learning_rate": 3.132188351835232e-06, |
| "loss": 0.49144487380981444, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6781029263370333, |
| "grad_norm": 0.4191620209508489, |
| "learning_rate": 2.874160584821798e-06, |
| "loss": 0.493665075302124, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6942482341069627, |
| "grad_norm": 0.4637660042911083, |
| "learning_rate": 2.622869515839524e-06, |
| "loss": 0.487306547164917, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7103935418768921, |
| "grad_norm": 0.4032504407350088, |
| "learning_rate": 2.3791114759930013e-06, |
| "loss": 0.4938949108123779, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7265388496468214, |
| "grad_norm": 0.463632173922839, |
| "learning_rate": 2.1436589245260375e-06, |
| "loss": 0.482679557800293, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7426841574167508, |
| "grad_norm": 0.4455245146514806, |
| "learning_rate": 1.9172580009299735e-06, |
| "loss": 0.4972172737121582, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7588294651866802, |
| "grad_norm": 0.4313461560156946, |
| "learning_rate": 1.7006261604581725e-06, |
| "loss": 0.483304500579834, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7749747729566094, |
| "grad_norm": 0.40802500432486366, |
| "learning_rate": 1.4944499005397372e-06, |
| "loss": 0.4877010822296143, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7911200807265388, |
| "grad_norm": 0.48272456919743567, |
| "learning_rate": 1.2993825852972559e-06, |
| "loss": 0.487977123260498, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8072653884964682, |
| "grad_norm": 0.44816082724241857, |
| "learning_rate": 1.1160423750626693e-06, |
| "loss": 0.4939706325531006, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8234106962663976, |
| "grad_norm": 0.4192584603019428, |
| "learning_rate": 9.450102674524952e-07, |
| "loss": 0.49027338027954104, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8395560040363269, |
| "grad_norm": 0.5505560128103805, |
| "learning_rate": 7.868282562101819e-07, |
| "loss": 0.4772333145141602, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8557013118062563, |
| "grad_norm": 0.408392546206603, |
| "learning_rate": 6.419976136501377e-07, |
| "loss": 0.48678107261657716, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8718466195761857, |
| "grad_norm": 0.4719879078262765, |
| "learning_rate": 5.109773021462921e-07, |
| "loss": 0.4825289726257324, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.887991927346115, |
| "grad_norm": 0.45855836941006267, |
| "learning_rate": 3.941825196991378e-07, |
| "loss": 0.4801445007324219, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9041372351160444, |
| "grad_norm": 0.4384510920334218, |
| "learning_rate": 2.919833841902714e-07, |
| "loss": 0.4838963508605957, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9202825428859738, |
| "grad_norm": 0.4203469347237003, |
| "learning_rate": 2.0470376049398944e-07, |
| "loss": 0.4862393379211426, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9364278506559032, |
| "grad_norm": 0.5986073034196603, |
| "learning_rate": 1.3262023416276414e-07, |
| "loss": 0.4833742618560791, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9525731584258325, |
| "grad_norm": 0.4395767849486276, |
| "learning_rate": 7.59612349389599e-08, |
| "loss": 0.48693051338195803, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9687184661957619, |
| "grad_norm": 0.6521570079176698, |
| "learning_rate": 3.4906312870331973e-08, |
| "loss": 0.4918181896209717, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9848637739656912, |
| "grad_norm": 0.42588128409358406, |
| "learning_rate": 9.585569323284915e-09, |
| "loss": 0.483614444732666, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9356887206854803, |
| "learning_rate": 7.924469696718451e-11, |
| "loss": 0.492323112487793, |
| "step": 620 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 620, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 155, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4385102991169946e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|