| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9972602739726026, |
| "eval_steps": 500, |
| "global_step": 546, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0365296803652968, |
| "grad_norm": 4.9501127583168785, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 1.0691, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0730593607305936, |
| "grad_norm": 28.452162534287613, |
| "learning_rate": 5.7575757575757586e-06, |
| "loss": 0.8434, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1095890410958904, |
| "grad_norm": 2.9662073954152746, |
| "learning_rate": 8.787878787878788e-06, |
| "loss": 0.8105, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.1461187214611872, |
| "grad_norm": 2.5352330284959965, |
| "learning_rate": 9.943342776203968e-06, |
| "loss": 0.761, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.182648401826484, |
| "grad_norm": 2.362958807035952, |
| "learning_rate": 9.848914069877243e-06, |
| "loss": 0.7277, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2191780821917808, |
| "grad_norm": 2.294627738902447, |
| "learning_rate": 9.75448536355052e-06, |
| "loss": 0.7132, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2557077625570776, |
| "grad_norm": 2.4090750376560828, |
| "learning_rate": 9.660056657223796e-06, |
| "loss": 0.7551, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2922374429223744, |
| "grad_norm": 2.205057459617227, |
| "learning_rate": 9.565627950897073e-06, |
| "loss": 0.7556, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3287671232876712, |
| "grad_norm": 2.2866915254644438, |
| "learning_rate": 9.47119924457035e-06, |
| "loss": 0.7181, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.365296803652968, |
| "grad_norm": 2.17656961412137, |
| "learning_rate": 9.376770538243626e-06, |
| "loss": 0.7194, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4018264840182648, |
| "grad_norm": 2.140996944291605, |
| "learning_rate": 9.282341831916904e-06, |
| "loss": 0.7684, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4383561643835616, |
| "grad_norm": 2.1884414288152874, |
| "learning_rate": 9.18791312559018e-06, |
| "loss": 0.7256, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4748858447488584, |
| "grad_norm": 2.3620447540216376, |
| "learning_rate": 9.093484419263457e-06, |
| "loss": 0.7258, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5114155251141552, |
| "grad_norm": 2.176892029979272, |
| "learning_rate": 8.999055712936734e-06, |
| "loss": 0.7105, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.547945205479452, |
| "grad_norm": 2.2934104921642056, |
| "learning_rate": 8.904627006610011e-06, |
| "loss": 0.6892, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5844748858447488, |
| "grad_norm": 2.161788987733457, |
| "learning_rate": 8.810198300283287e-06, |
| "loss": 0.7044, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6210045662100456, |
| "grad_norm": 2.131707500176503, |
| "learning_rate": 8.715769593956564e-06, |
| "loss": 0.7708, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6575342465753424, |
| "grad_norm": 2.1048318064905174, |
| "learning_rate": 8.62134088762984e-06, |
| "loss": 0.6701, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6940639269406392, |
| "grad_norm": 2.0128804847924533, |
| "learning_rate": 8.526912181303117e-06, |
| "loss": 0.7422, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.730593607305936, |
| "grad_norm": 2.259736010184029, |
| "learning_rate": 8.432483474976394e-06, |
| "loss": 0.6941, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7671232876712328, |
| "grad_norm": 2.080398916483824, |
| "learning_rate": 8.33805476864967e-06, |
| "loss": 0.735, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8036529680365296, |
| "grad_norm": 2.07699507931849, |
| "learning_rate": 8.243626062322947e-06, |
| "loss": 0.6863, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8401826484018264, |
| "grad_norm": 2.1694371746152368, |
| "learning_rate": 8.149197355996223e-06, |
| "loss": 0.6996, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8767123287671232, |
| "grad_norm": 1.9617743120029298, |
| "learning_rate": 8.0547686496695e-06, |
| "loss": 0.7033, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.91324200913242, |
| "grad_norm": 2.0825941268129298, |
| "learning_rate": 7.960339943342776e-06, |
| "loss": 0.7064, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9497716894977168, |
| "grad_norm": 1.8965962444263702, |
| "learning_rate": 7.865911237016053e-06, |
| "loss": 0.6809, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9863013698630136, |
| "grad_norm": 1.9874641822588675, |
| "learning_rate": 7.77148253068933e-06, |
| "loss": 0.7047, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0255707762557078, |
| "grad_norm": 2.004027409704685, |
| "learning_rate": 7.677053824362606e-06, |
| "loss": 0.6212, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0621004566210046, |
| "grad_norm": 1.8176126243818052, |
| "learning_rate": 7.582625118035884e-06, |
| "loss": 0.5171, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0986301369863014, |
| "grad_norm": 2.148968213788119, |
| "learning_rate": 7.48819641170916e-06, |
| "loss": 0.5385, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1351598173515982, |
| "grad_norm": 2.021362745716521, |
| "learning_rate": 7.3937677053824365e-06, |
| "loss": 0.5367, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.171689497716895, |
| "grad_norm": 2.060348666344981, |
| "learning_rate": 7.299338999055714e-06, |
| "loss": 0.5209, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2082191780821918, |
| "grad_norm": 1.80075041228175, |
| "learning_rate": 7.20491029272899e-06, |
| "loss": 0.4987, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2447488584474886, |
| "grad_norm": 2.0270293853719865, |
| "learning_rate": 7.110481586402267e-06, |
| "loss": 0.5365, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2812785388127854, |
| "grad_norm": 2.3514775380809048, |
| "learning_rate": 7.016052880075543e-06, |
| "loss": 0.5197, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.3178082191780822, |
| "grad_norm": 1.8590134591378304, |
| "learning_rate": 6.92162417374882e-06, |
| "loss": 0.5155, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.354337899543379, |
| "grad_norm": 1.9026553689519272, |
| "learning_rate": 6.827195467422096e-06, |
| "loss": 0.5174, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3908675799086758, |
| "grad_norm": 1.8300116300902427, |
| "learning_rate": 6.732766761095374e-06, |
| "loss": 0.5231, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4273972602739726, |
| "grad_norm": 1.9157955944054372, |
| "learning_rate": 6.638338054768651e-06, |
| "loss": 0.5327, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.4639269406392694, |
| "grad_norm": 1.8429454712112896, |
| "learning_rate": 6.543909348441927e-06, |
| "loss": 0.527, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5004566210045662, |
| "grad_norm": 1.9805863537564896, |
| "learning_rate": 6.449480642115204e-06, |
| "loss": 0.5279, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.536986301369863, |
| "grad_norm": 1.7075809063269636, |
| "learning_rate": 6.35505193578848e-06, |
| "loss": 0.5495, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5735159817351598, |
| "grad_norm": 2.0725088975854513, |
| "learning_rate": 6.260623229461757e-06, |
| "loss": 0.529, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6100456621004566, |
| "grad_norm": 1.9393642829879987, |
| "learning_rate": 6.166194523135034e-06, |
| "loss": 0.5223, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.6465753424657534, |
| "grad_norm": 2.023782412529122, |
| "learning_rate": 6.0717658168083105e-06, |
| "loss": 0.4945, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6831050228310502, |
| "grad_norm": 2.1901747912652136, |
| "learning_rate": 5.977337110481587e-06, |
| "loss": 0.5144, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.719634703196347, |
| "grad_norm": 1.7318839753018282, |
| "learning_rate": 5.8829084041548635e-06, |
| "loss": 0.5149, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.7561643835616438, |
| "grad_norm": 2.001666586765145, |
| "learning_rate": 5.78847969782814e-06, |
| "loss": 0.5516, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7926940639269406, |
| "grad_norm": 1.9348719585521998, |
| "learning_rate": 5.6940509915014164e-06, |
| "loss": 0.5237, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.8292237442922374, |
| "grad_norm": 1.7976901694713592, |
| "learning_rate": 5.599622285174694e-06, |
| "loss": 0.5262, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8657534246575342, |
| "grad_norm": 1.8133539707609403, |
| "learning_rate": 5.50519357884797e-06, |
| "loss": 0.5192, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.902283105022831, |
| "grad_norm": 2.024974159795074, |
| "learning_rate": 5.410764872521247e-06, |
| "loss": 0.547, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.9388127853881278, |
| "grad_norm": 1.7187815700467375, |
| "learning_rate": 5.316336166194523e-06, |
| "loss": 0.5299, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9753424657534246, |
| "grad_norm": 1.9519525099517923, |
| "learning_rate": 5.2219074598678e-06, |
| "loss": 0.5219, |
| "step": 540 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1092, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 86671028453376.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|