| { |
| "best_global_step": 3146, |
| "best_metric": 0.4961947202682495, |
| "best_model_checkpoint": "./results/checkpoint-3146", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 4719, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06357279084551812, |
| "grad_norm": 9.905113220214844, |
| "learning_rate": 1.958041958041958e-05, |
| "loss": 0.8792, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12714558169103624, |
| "grad_norm": 34.34392166137695, |
| "learning_rate": 1.9156600974782794e-05, |
| "loss": 0.7359, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.19071837253655435, |
| "grad_norm": 10.16415023803711, |
| "learning_rate": 1.8732782369146007e-05, |
| "loss": 0.6854, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25429116338207247, |
| "grad_norm": 16.56046485900879, |
| "learning_rate": 1.830896376350922e-05, |
| "loss": 0.6087, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3178639542275906, |
| "grad_norm": 4.33865213394165, |
| "learning_rate": 1.7885145157872432e-05, |
| "loss": 0.5703, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3814367450731087, |
| "grad_norm": 3.42850661277771, |
| "learning_rate": 1.7461326552235645e-05, |
| "loss": 0.5789, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.44500953591862685, |
| "grad_norm": 10.796558380126953, |
| "learning_rate": 1.7037507946598858e-05, |
| "loss": 0.5224, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5085823267641449, |
| "grad_norm": 5.681849479675293, |
| "learning_rate": 1.661368934096207e-05, |
| "loss": 0.5429, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5721551176096631, |
| "grad_norm": 10.79218864440918, |
| "learning_rate": 1.618987073532528e-05, |
| "loss": 0.5732, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6357279084551812, |
| "grad_norm": 4.346675395965576, |
| "learning_rate": 1.5766052129688493e-05, |
| "loss": 0.5606, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6993006993006993, |
| "grad_norm": 8.24238395690918, |
| "learning_rate": 1.5342233524051706e-05, |
| "loss": 0.52, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7628734901462174, |
| "grad_norm": 6.931696891784668, |
| "learning_rate": 1.491841491841492e-05, |
| "loss": 0.5521, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8264462809917356, |
| "grad_norm": 6.141210079193115, |
| "learning_rate": 1.4494596312778133e-05, |
| "loss": 0.5475, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8900190718372537, |
| "grad_norm": 12.69131088256836, |
| "learning_rate": 1.4070777707141346e-05, |
| "loss": 0.5375, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9535918626827717, |
| "grad_norm": 3.2147998809814453, |
| "learning_rate": 1.3646959101504557e-05, |
| "loss": 0.5187, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.5000280737876892, |
| "eval_runtime": 17.0424, |
| "eval_samples_per_second": 369.197, |
| "eval_steps_per_second": 11.559, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.0171646535282899, |
| "grad_norm": 7.1118574142456055, |
| "learning_rate": 1.322314049586777e-05, |
| "loss": 0.5293, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.080737444373808, |
| "grad_norm": 12.308618545532227, |
| "learning_rate": 1.2799321890230982e-05, |
| "loss": 0.4733, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.1443102352193262, |
| "grad_norm": 7.184673309326172, |
| "learning_rate": 1.2375503284594195e-05, |
| "loss": 0.4817, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.2078830260648443, |
| "grad_norm": 12.598664283752441, |
| "learning_rate": 1.1951684678957406e-05, |
| "loss": 0.4823, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.2714558169103625, |
| "grad_norm": 9.51124382019043, |
| "learning_rate": 1.152786607332062e-05, |
| "loss": 0.5099, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.3350286077558806, |
| "grad_norm": 11.298410415649414, |
| "learning_rate": 1.1104047467683832e-05, |
| "loss": 0.4713, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.3986013986013985, |
| "grad_norm": 5.375974655151367, |
| "learning_rate": 1.0680228862047045e-05, |
| "loss": 0.4731, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.4621741894469167, |
| "grad_norm": 6.566510200500488, |
| "learning_rate": 1.0256410256410256e-05, |
| "loss": 0.4834, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.5257469802924348, |
| "grad_norm": 24.000308990478516, |
| "learning_rate": 9.83259165077347e-06, |
| "loss": 0.4692, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.589319771137953, |
| "grad_norm": 6.890144348144531, |
| "learning_rate": 9.408773045136681e-06, |
| "loss": 0.4692, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.6528925619834711, |
| "grad_norm": 4.995481491088867, |
| "learning_rate": 8.984954439499894e-06, |
| "loss": 0.4613, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.716465352828989, |
| "grad_norm": 5.291828155517578, |
| "learning_rate": 8.561135833863107e-06, |
| "loss": 0.5056, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.7800381436745072, |
| "grad_norm": 9.579062461853027, |
| "learning_rate": 8.13731722822632e-06, |
| "loss": 0.48, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.8436109345200253, |
| "grad_norm": 6.972827434539795, |
| "learning_rate": 7.713498622589533e-06, |
| "loss": 0.4831, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.9071837253655435, |
| "grad_norm": 6.582764148712158, |
| "learning_rate": 7.2896800169527446e-06, |
| "loss": 0.4987, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.9707565162110616, |
| "grad_norm": 4.8419413566589355, |
| "learning_rate": 6.865861411315957e-06, |
| "loss": 0.4875, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.4961947202682495, |
| "eval_runtime": 17.0937, |
| "eval_samples_per_second": 368.089, |
| "eval_steps_per_second": 11.525, |
| "step": 3146 |
| }, |
| { |
| "epoch": 2.0343293070565798, |
| "grad_norm": 4.859919548034668, |
| "learning_rate": 6.442042805679169e-06, |
| "loss": 0.4774, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.097902097902098, |
| "grad_norm": 10.904428482055664, |
| "learning_rate": 6.018224200042382e-06, |
| "loss": 0.4427, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.161474888747616, |
| "grad_norm": 9.313108444213867, |
| "learning_rate": 5.594405594405595e-06, |
| "loss": 0.4337, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.225047679593134, |
| "grad_norm": 21.283798217773438, |
| "learning_rate": 5.170586988768808e-06, |
| "loss": 0.4418, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.2886204704386524, |
| "grad_norm": 6.696113586425781, |
| "learning_rate": 4.74676838313202e-06, |
| "loss": 0.4404, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.3521932612841705, |
| "grad_norm": 5.288127899169922, |
| "learning_rate": 4.322949777495232e-06, |
| "loss": 0.4443, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.4157660521296886, |
| "grad_norm": 9.664108276367188, |
| "learning_rate": 3.899131171858445e-06, |
| "loss": 0.4379, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.479338842975207, |
| "grad_norm": 9.518948554992676, |
| "learning_rate": 3.4753125662216576e-06, |
| "loss": 0.4039, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.542911633820725, |
| "grad_norm": 3.7866299152374268, |
| "learning_rate": 3.05149396058487e-06, |
| "loss": 0.4358, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.6064844246662426, |
| "grad_norm": 9.404533386230469, |
| "learning_rate": 2.6276753549480827e-06, |
| "loss": 0.4298, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.6700572155117612, |
| "grad_norm": 6.5651021003723145, |
| "learning_rate": 2.203856749311295e-06, |
| "loss": 0.436, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.733630006357279, |
| "grad_norm": 14.693212509155273, |
| "learning_rate": 1.7800381436745072e-06, |
| "loss": 0.4605, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.797202797202797, |
| "grad_norm": 8.170985221862793, |
| "learning_rate": 1.35621953803772e-06, |
| "loss": 0.4334, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.860775588048315, |
| "grad_norm": 9.423282623291016, |
| "learning_rate": 9.324009324009325e-07, |
| "loss": 0.4228, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.9243483788938334, |
| "grad_norm": 8.17439079284668, |
| "learning_rate": 5.08582326764145e-07, |
| "loss": 0.4354, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.9879211697393515, |
| "grad_norm": 4.317421913146973, |
| "learning_rate": 8.47637211273575e-08, |
| "loss": 0.3835, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.5011389255523682, |
| "eval_runtime": 17.0986, |
| "eval_samples_per_second": 367.984, |
| "eval_steps_per_second": 11.521, |
| "step": 4719 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 4719, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3453289607031264.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|