| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2984, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01675603217158177, |
| "grad_norm": 3.53125, |
| "learning_rate": 4.9178954423592495e-05, |
| "loss": 4.2472, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03351206434316354, |
| "grad_norm": 6.53125, |
| "learning_rate": 4.834115281501341e-05, |
| "loss": 4.0273, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05026809651474531, |
| "grad_norm": 3.4375, |
| "learning_rate": 4.750335120643432e-05, |
| "loss": 4.039, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06702412868632708, |
| "grad_norm": 2.25, |
| "learning_rate": 4.666554959785523e-05, |
| "loss": 3.9948, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08378016085790885, |
| "grad_norm": 3.34375, |
| "learning_rate": 4.582774798927614e-05, |
| "loss": 3.9269, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10053619302949061, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.4989946380697054e-05, |
| "loss": 3.9419, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11729222520107238, |
| "grad_norm": 2.875, |
| "learning_rate": 4.4152144772117966e-05, |
| "loss": 3.915, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13404825737265416, |
| "grad_norm": 2.734375, |
| "learning_rate": 4.331434316353888e-05, |
| "loss": 3.9188, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15080428954423591, |
| "grad_norm": 6.9375, |
| "learning_rate": 4.247654155495979e-05, |
| "loss": 3.8512, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1675603217158177, |
| "grad_norm": 5.65625, |
| "learning_rate": 4.16387399463807e-05, |
| "loss": 3.9142, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18431635388739948, |
| "grad_norm": 2.75, |
| "learning_rate": 4.0800938337801606e-05, |
| "loss": 3.8583, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.20107238605898123, |
| "grad_norm": 3.03125, |
| "learning_rate": 3.9963136729222525e-05, |
| "loss": 3.8955, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.217828418230563, |
| "grad_norm": 7.21875, |
| "learning_rate": 3.912533512064344e-05, |
| "loss": 3.8571, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.23458445040214476, |
| "grad_norm": 2.59375, |
| "learning_rate": 3.828753351206434e-05, |
| "loss": 3.9075, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.25134048257372654, |
| "grad_norm": 3.28125, |
| "learning_rate": 3.744973190348526e-05, |
| "loss": 3.8933, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2680965147453083, |
| "grad_norm": 2.046875, |
| "learning_rate": 3.6611930294906165e-05, |
| "loss": 3.8752, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2848525469168901, |
| "grad_norm": 2.359375, |
| "learning_rate": 3.577412868632708e-05, |
| "loss": 3.8662, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.30160857908847183, |
| "grad_norm": 2.8125, |
| "learning_rate": 3.4936327077747996e-05, |
| "loss": 3.912, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3183646112600536, |
| "grad_norm": 1.953125, |
| "learning_rate": 3.40985254691689e-05, |
| "loss": 3.9365, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3351206434316354, |
| "grad_norm": 3.4375, |
| "learning_rate": 3.326072386058981e-05, |
| "loss": 3.8791, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.35187667560321717, |
| "grad_norm": 3.390625, |
| "learning_rate": 3.2422922252010724e-05, |
| "loss": 3.8536, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.36863270777479895, |
| "grad_norm": 3.1875, |
| "learning_rate": 3.1585120643431636e-05, |
| "loss": 3.8585, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3853887399463807, |
| "grad_norm": 3.40625, |
| "learning_rate": 3.074731903485255e-05, |
| "loss": 3.9163, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.40214477211796246, |
| "grad_norm": 2.34375, |
| "learning_rate": 2.990951742627346e-05, |
| "loss": 3.8529, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.41890080428954424, |
| "grad_norm": 3.140625, |
| "learning_rate": 2.907171581769437e-05, |
| "loss": 3.86, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.435656836461126, |
| "grad_norm": 2.40625, |
| "learning_rate": 2.823391420911528e-05, |
| "loss": 3.8541, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4524128686327078, |
| "grad_norm": 2.03125, |
| "learning_rate": 2.7396112600536195e-05, |
| "loss": 3.8549, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4691689008042895, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.6558310991957107e-05, |
| "loss": 3.884, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4859249329758713, |
| "grad_norm": 3.34375, |
| "learning_rate": 2.5720509383378015e-05, |
| "loss": 3.8677, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5026809651474531, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.488270777479893e-05, |
| "loss": 3.7921, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5194369973190348, |
| "grad_norm": 7.0, |
| "learning_rate": 2.4044906166219842e-05, |
| "loss": 3.8765, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5361930294906166, |
| "grad_norm": 1.7421875, |
| "learning_rate": 2.320710455764075e-05, |
| "loss": 3.7936, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5529490616621984, |
| "grad_norm": 1.8984375, |
| "learning_rate": 2.2369302949061662e-05, |
| "loss": 3.8304, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5697050938337802, |
| "grad_norm": 2.421875, |
| "learning_rate": 2.1531501340482574e-05, |
| "loss": 3.7994, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5864611260053619, |
| "grad_norm": 3.234375, |
| "learning_rate": 2.069369973190349e-05, |
| "loss": 3.8363, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6032171581769437, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.9855898123324398e-05, |
| "loss": 3.8377, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6199731903485255, |
| "grad_norm": 2.296875, |
| "learning_rate": 1.901809651474531e-05, |
| "loss": 3.86, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6367292225201072, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.818029490616622e-05, |
| "loss": 3.8127, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.653485254691689, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.7342493297587133e-05, |
| "loss": 3.8481, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6702412868632708, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.6504691689008045e-05, |
| "loss": 3.8308, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6869973190348525, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.5666890080428956e-05, |
| "loss": 3.7686, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7037533512064343, |
| "grad_norm": 2.109375, |
| "learning_rate": 1.4829088471849867e-05, |
| "loss": 3.8635, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7205093833780161, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.3991286863270778e-05, |
| "loss": 3.8338, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7372654155495979, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.3153485254691688e-05, |
| "loss": 3.8528, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7540214477211796, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.23156836461126e-05, |
| "loss": 3.879, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7707774798927614, |
| "grad_norm": 2.453125, |
| "learning_rate": 1.1477882037533512e-05, |
| "loss": 3.8653, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7875335120643432, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.0640080428954424e-05, |
| "loss": 3.7811, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.8042895442359249, |
| "grad_norm": 1.8125, |
| "learning_rate": 9.802278820375336e-06, |
| "loss": 3.864, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8210455764075067, |
| "grad_norm": 3.015625, |
| "learning_rate": 8.964477211796247e-06, |
| "loss": 3.8341, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8378016085790885, |
| "grad_norm": 2.109375, |
| "learning_rate": 8.126675603217159e-06, |
| "loss": 3.8547, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8545576407506702, |
| "grad_norm": 3.765625, |
| "learning_rate": 7.288873994638071e-06, |
| "loss": 3.8611, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.871313672922252, |
| "grad_norm": 2.390625, |
| "learning_rate": 6.451072386058982e-06, |
| "loss": 3.7803, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8880697050938338, |
| "grad_norm": 2.609375, |
| "learning_rate": 5.613270777479894e-06, |
| "loss": 3.8047, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.9048257372654156, |
| "grad_norm": 2.09375, |
| "learning_rate": 4.7754691689008045e-06, |
| "loss": 3.8275, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9215817694369973, |
| "grad_norm": 3.59375, |
| "learning_rate": 3.9376675603217155e-06, |
| "loss": 3.8354, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.938337801608579, |
| "grad_norm": 2.90625, |
| "learning_rate": 3.0998659517426277e-06, |
| "loss": 3.8245, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9550938337801609, |
| "grad_norm": 2.375, |
| "learning_rate": 2.262064343163539e-06, |
| "loss": 3.8646, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9718498659517426, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.4242627345844506e-06, |
| "loss": 3.8213, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9886058981233244, |
| "grad_norm": 7.625, |
| "learning_rate": 5.86461126005362e-07, |
| "loss": 3.7671, |
| "step": 2950 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2984, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.509068212041933e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|