| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.07399459839431721, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014798919678863443, | |
| "grad_norm": 3.318990468978882, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.8225, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0029597839357726886, | |
| "grad_norm": 1.644492745399475, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.2495, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004439675903659033, | |
| "grad_norm": 2.137453556060791, | |
| "learning_rate": 9.998250366089848e-05, | |
| "loss": 0.2076, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005919567871545377, | |
| "grad_norm": 1.6013213396072388, | |
| "learning_rate": 9.97858104436822e-05, | |
| "loss": 0.1753, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.007399459839431722, | |
| "grad_norm": 2.4884767532348633, | |
| "learning_rate": 9.937141654477528e-05, | |
| "loss": 0.1661, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.008879351807318065, | |
| "grad_norm": 1.5080770254135132, | |
| "learning_rate": 9.87411340032603e-05, | |
| "loss": 0.1414, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01035924377520441, | |
| "grad_norm": 1.3873870372772217, | |
| "learning_rate": 9.789771888432375e-05, | |
| "loss": 0.1409, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.011839135743090754, | |
| "grad_norm": 1.1532950401306152, | |
| "learning_rate": 9.684485922768422e-05, | |
| "loss": 0.1169, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.013319027710977099, | |
| "grad_norm": 1.088762640953064, | |
| "learning_rate": 9.558715892073323e-05, | |
| "loss": 0.1043, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.014798919678863444, | |
| "grad_norm": 2.323737621307373, | |
| "learning_rate": 9.413011756690685e-05, | |
| "loss": 0.116, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016278811646749786, | |
| "grad_norm": 1.2114522457122803, | |
| "learning_rate": 9.248010643731935e-05, | |
| "loss": 0.1146, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.01775870361463613, | |
| "grad_norm": 1.2140398025512695, | |
| "learning_rate": 9.064434061081562e-05, | |
| "loss": 0.1248, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.019238595582522475, | |
| "grad_norm": 1.2787519693374634, | |
| "learning_rate": 8.863084742426719e-05, | |
| "loss": 0.0848, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02071848755040882, | |
| "grad_norm": 0.8212881684303284, | |
| "learning_rate": 8.644843137107059e-05, | |
| "loss": 0.0965, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.022198379518295164, | |
| "grad_norm": 1.0979036092758179, | |
| "learning_rate": 8.410663560133784e-05, | |
| "loss": 0.0922, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02367827148618151, | |
| "grad_norm": 0.9978598356246948, | |
| "learning_rate": 8.161570019212921e-05, | |
| "loss": 0.0916, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.025158163454067854, | |
| "grad_norm": 0.8716151714324951, | |
| "learning_rate": 7.898651737020166e-05, | |
| "loss": 0.067, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.026638055421954198, | |
| "grad_norm": 1.0746519565582275, | |
| "learning_rate": 7.623058388307269e-05, | |
| "loss": 0.0802, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.028117947389840543, | |
| "grad_norm": 1.3861006498336792, | |
| "learning_rate": 7.335995072666848e-05, | |
| "loss": 0.0907, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.029597839357726887, | |
| "grad_norm": 1.093328833580017, | |
| "learning_rate": 7.038717044938519e-05, | |
| "loss": 0.0657, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03107773132561323, | |
| "grad_norm": 0.8141024708747864, | |
| "learning_rate": 6.732524226298841e-05, | |
| "loss": 0.0613, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03255762329349957, | |
| "grad_norm": 0.9468361139297485, | |
| "learning_rate": 6.418755520036775e-05, | |
| "loss": 0.0829, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03403751526138592, | |
| "grad_norm": 0.6965128183364868, | |
| "learning_rate": 6.0987829568702656e-05, | |
| "loss": 0.0796, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.03551740722927226, | |
| "grad_norm": 1.127314567565918, | |
| "learning_rate": 5.7740056954050084e-05, | |
| "loss": 0.0798, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.036997299197158606, | |
| "grad_norm": 1.4155848026275635, | |
| "learning_rate": 5.445843903969854e-05, | |
| "loss": 0.06, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03847719116504495, | |
| "grad_norm": 0.7054800987243652, | |
| "learning_rate": 5.1157325505820694e-05, | |
| "loss": 0.052, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.039957083132931295, | |
| "grad_norm": 0.7660062909126282, | |
| "learning_rate": 4.785115128197298e-05, | |
| "loss": 0.059, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04143697510081764, | |
| "grad_norm": 0.7886701822280884, | |
| "learning_rate": 4.4554373426821374e-05, | |
| "loss": 0.0536, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.042916867068703984, | |
| "grad_norm": 1.4434950351715088, | |
| "learning_rate": 4.1281407911102425e-05, | |
| "loss": 0.0628, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.04439675903659033, | |
| "grad_norm": 0.9001641869544983, | |
| "learning_rate": 3.8046566580251e-05, | |
| "loss": 0.0536, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04587665100447667, | |
| "grad_norm": 0.7539929151535034, | |
| "learning_rate": 3.4863994572341843e-05, | |
| "loss": 0.0673, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.04735654297236302, | |
| "grad_norm": 0.6108214855194092, | |
| "learning_rate": 3.1747608464999725e-05, | |
| "loss": 0.0358, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.04883643494024936, | |
| "grad_norm": 0.4837505519390106, | |
| "learning_rate": 2.8711035421746367e-05, | |
| "loss": 0.041, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05031632690813571, | |
| "grad_norm": 0.8078829050064087, | |
| "learning_rate": 2.5767553603881767e-05, | |
| "loss": 0.0518, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05179621887602205, | |
| "grad_norm": 1.5803087949752808, | |
| "learning_rate": 2.29300341084631e-05, | |
| "loss": 0.0486, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.053276110843908396, | |
| "grad_norm": 0.3863455355167389, | |
| "learning_rate": 2.0210884686272368e-05, | |
| "loss": 0.039, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05475600281179474, | |
| "grad_norm": 0.7085531949996948, | |
| "learning_rate": 1.7621995485879062e-05, | |
| "loss": 0.0389, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.056235894779681085, | |
| "grad_norm": 0.7141103744506836, | |
| "learning_rate": 1.517468706104589e-05, | |
| "loss": 0.0425, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.05771578674756743, | |
| "grad_norm": 0.6287118196487427, | |
| "learning_rate": 1.2879660868827508e-05, | |
| "loss": 0.0457, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.059195678715453774, | |
| "grad_norm": 0.821704089641571, | |
| "learning_rate": 1.0746952474821614e-05, | |
| "loss": 0.0436, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06067557068334012, | |
| "grad_norm": 0.6595329642295837, | |
| "learning_rate": 8.785887670194138e-06, | |
| "loss": 0.0436, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06215546265122646, | |
| "grad_norm": 0.5961970090866089, | |
| "learning_rate": 7.005041692367154e-06, | |
| "loss": 0.0309, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06363535461911281, | |
| "grad_norm": 0.578628659248352, | |
| "learning_rate": 5.412201727687644e-06, | |
| "loss": 0.0402, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.06511524658699915, | |
| "grad_norm": 0.48573747277259827, | |
| "learning_rate": 4.01433286004283e-06, | |
| "loss": 0.0338, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0665951385548855, | |
| "grad_norm": 0.54139244556427, | |
| "learning_rate": 2.817547614320615e-06, | |
| "loss": 0.0269, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.06807503052277183, | |
| "grad_norm": 0.6104289293289185, | |
| "learning_rate": 1.8270792278934302e-06, | |
| "loss": 0.0328, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.06955492249065819, | |
| "grad_norm": 0.40234851837158203, | |
| "learning_rate": 1.0472587670027678e-06, | |
| "loss": 0.0341, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07103481445854452, | |
| "grad_norm": 0.3963511288166046, | |
| "learning_rate": 4.814961881085045e-07, | |
| "loss": 0.0347, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07251470642643087, | |
| "grad_norm": 0.36650583148002625, | |
| "learning_rate": 1.3226542701689215e-07, | |
| "loss": 0.0401, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.07399459839431721, | |
| "grad_norm": 0.48519328236579895, | |
| "learning_rate": 1.0935809887702154e-09, | |
| "loss": 0.0292, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07399459839431721, | |
| "step": 500, | |
| "total_flos": 0.0, | |
| "train_loss": 0.09187581622600556, | |
| "train_runtime": 831.2312, | |
| "train_samples_per_second": 4.812, | |
| "train_steps_per_second": 0.602 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |