| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.688172043010753, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.053763440860215055, | |
| "grad_norm": 3.0181329250335693, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.6001, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10752688172043011, | |
| "grad_norm": 1.7442196607589722, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.2891, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 1.5924317836761475, | |
| "learning_rate": 9.998250366089848e-05, | |
| "loss": 0.2614, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.21505376344086022, | |
| "grad_norm": 1.3713581562042236, | |
| "learning_rate": 9.97858104436822e-05, | |
| "loss": 0.2364, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.26881720430107525, | |
| "grad_norm": 1.3055821657180786, | |
| "learning_rate": 9.937141654477528e-05, | |
| "loss": 0.1779, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 0.9266240000724792, | |
| "learning_rate": 9.87411340032603e-05, | |
| "loss": 0.1571, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3763440860215054, | |
| "grad_norm": 1.0376759767532349, | |
| "learning_rate": 9.789771888432375e-05, | |
| "loss": 0.137, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.43010752688172044, | |
| "grad_norm": 0.7397834062576294, | |
| "learning_rate": 9.684485922768422e-05, | |
| "loss": 0.1182, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 0.5904547572135925, | |
| "learning_rate": 9.558715892073323e-05, | |
| "loss": 0.1046, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 1.022401213645935, | |
| "learning_rate": 9.413011756690685e-05, | |
| "loss": 0.1056, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5913978494623656, | |
| "grad_norm": 0.7747468948364258, | |
| "learning_rate": 9.248010643731935e-05, | |
| "loss": 0.1006, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 0.8032405972480774, | |
| "learning_rate": 9.064434061081562e-05, | |
| "loss": 0.0902, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6989247311827957, | |
| "grad_norm": 0.6915440559387207, | |
| "learning_rate": 8.863084742426719e-05, | |
| "loss": 0.0861, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7526881720430108, | |
| "grad_norm": 0.7343285083770752, | |
| "learning_rate": 8.644843137107059e-05, | |
| "loss": 0.0873, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 0.6028986573219299, | |
| "learning_rate": 8.410663560133784e-05, | |
| "loss": 0.0831, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8602150537634409, | |
| "grad_norm": 0.5441303253173828, | |
| "learning_rate": 8.161570019212921e-05, | |
| "loss": 0.0772, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9139784946236559, | |
| "grad_norm": 0.5184003114700317, | |
| "learning_rate": 7.898651737020166e-05, | |
| "loss": 0.0739, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 0.38568446040153503, | |
| "learning_rate": 7.623058388307269e-05, | |
| "loss": 0.0729, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.021505376344086, | |
| "grad_norm": 0.5226582288742065, | |
| "learning_rate": 7.335995072666848e-05, | |
| "loss": 0.0733, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "grad_norm": 0.6022420525550842, | |
| "learning_rate": 7.038717044938519e-05, | |
| "loss": 0.0773, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.129032258064516, | |
| "grad_norm": 0.6212151646614075, | |
| "learning_rate": 6.732524226298841e-05, | |
| "loss": 0.0684, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1827956989247312, | |
| "grad_norm": 0.5427640080451965, | |
| "learning_rate": 6.418755520036775e-05, | |
| "loss": 0.0678, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.2365591397849462, | |
| "grad_norm": 0.6625052690505981, | |
| "learning_rate": 6.0987829568702656e-05, | |
| "loss": 0.073, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2903225806451613, | |
| "grad_norm": 0.37876975536346436, | |
| "learning_rate": 5.7740056954050084e-05, | |
| "loss": 0.0684, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3440860215053765, | |
| "grad_norm": 0.3789575397968292, | |
| "learning_rate": 5.445843903969854e-05, | |
| "loss": 0.0573, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3978494623655915, | |
| "grad_norm": 0.380149245262146, | |
| "learning_rate": 5.1157325505820694e-05, | |
| "loss": 0.0618, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4516129032258065, | |
| "grad_norm": 0.4945651888847351, | |
| "learning_rate": 4.785115128197298e-05, | |
| "loss": 0.0632, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.5053763440860215, | |
| "grad_norm": 0.47128596901893616, | |
| "learning_rate": 4.4554373426821374e-05, | |
| "loss": 0.0619, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.5591397849462365, | |
| "grad_norm": 0.3431663513183594, | |
| "learning_rate": 4.1281407911102425e-05, | |
| "loss": 0.0605, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 0.3071378171443939, | |
| "learning_rate": 3.8046566580251e-05, | |
| "loss": 0.0539, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.25178495049476624, | |
| "learning_rate": 3.4863994572341843e-05, | |
| "loss": 0.0547, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.7204301075268817, | |
| "grad_norm": 0.3284253478050232, | |
| "learning_rate": 3.1747608464999725e-05, | |
| "loss": 0.0558, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7741935483870968, | |
| "grad_norm": 0.40466904640197754, | |
| "learning_rate": 2.8711035421746367e-05, | |
| "loss": 0.0519, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.827956989247312, | |
| "grad_norm": 0.3861023485660553, | |
| "learning_rate": 2.5767553603881767e-05, | |
| "loss": 0.0534, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.881720430107527, | |
| "grad_norm": 0.4377838671207428, | |
| "learning_rate": 2.29300341084631e-05, | |
| "loss": 0.0507, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 0.3389015197753906, | |
| "learning_rate": 2.0210884686272368e-05, | |
| "loss": 0.0533, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.989247311827957, | |
| "grad_norm": 0.2521311938762665, | |
| "learning_rate": 1.7621995485879062e-05, | |
| "loss": 0.0536, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.043010752688172, | |
| "grad_norm": 0.30193665623664856, | |
| "learning_rate": 1.517468706104589e-05, | |
| "loss": 0.0562, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.096774193548387, | |
| "grad_norm": 0.3276124596595764, | |
| "learning_rate": 1.2879660868827508e-05, | |
| "loss": 0.0497, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.150537634408602, | |
| "grad_norm": 0.2738921642303467, | |
| "learning_rate": 1.0746952474821614e-05, | |
| "loss": 0.0505, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.204301075268817, | |
| "grad_norm": 0.23493026196956635, | |
| "learning_rate": 8.785887670194138e-06, | |
| "loss": 0.05, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.258064516129032, | |
| "grad_norm": 0.27332600951194763, | |
| "learning_rate": 7.005041692367154e-06, | |
| "loss": 0.0478, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.3118279569892475, | |
| "grad_norm": 0.23733267188072205, | |
| "learning_rate": 5.412201727687644e-06, | |
| "loss": 0.0485, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.3655913978494625, | |
| "grad_norm": 0.3148542046546936, | |
| "learning_rate": 4.01433286004283e-06, | |
| "loss": 0.0486, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.4193548387096775, | |
| "grad_norm": 0.17621102929115295, | |
| "learning_rate": 2.817547614320615e-06, | |
| "loss": 0.0459, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.4731182795698925, | |
| "grad_norm": 0.3627742528915405, | |
| "learning_rate": 1.8270792278934302e-06, | |
| "loss": 0.0447, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.5268817204301075, | |
| "grad_norm": 0.2204674929380417, | |
| "learning_rate": 1.0472587670027678e-06, | |
| "loss": 0.0432, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.5806451612903225, | |
| "grad_norm": 0.2851138114929199, | |
| "learning_rate": 4.814961881085045e-07, | |
| "loss": 0.049, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.6344086021505375, | |
| "grad_norm": 0.22748719155788422, | |
| "learning_rate": 1.3226542701689215e-07, | |
| "loss": 0.0452, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.688172043010753, | |
| "grad_norm": 0.29575690627098083, | |
| "learning_rate": 1.0935809887702154e-09, | |
| "loss": 0.0484, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.688172043010753, | |
| "step": 500, | |
| "total_flos": 0.0, | |
| "train_loss": 0.09292892861366273, | |
| "train_runtime": 1122.3185, | |
| "train_samples_per_second": 42.769, | |
| "train_steps_per_second": 0.446 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 96, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |