| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 269, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0186219739292365, | |
| "grad_norm": 1.397485375404358, | |
| "learning_rate": 1.7647058823529412e-06, | |
| "loss": 1.3045, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.037243947858473, | |
| "grad_norm": 0.9164593815803528, | |
| "learning_rate": 3.970588235294118e-06, | |
| "loss": 1.3238, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.055865921787709494, | |
| "grad_norm": 0.7285172343254089, | |
| "learning_rate": 6.176470588235294e-06, | |
| "loss": 1.3402, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.074487895716946, | |
| "grad_norm": 0.7771405577659607, | |
| "learning_rate": 8.382352941176472e-06, | |
| "loss": 1.2444, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0931098696461825, | |
| "grad_norm": 0.590904951095581, | |
| "learning_rate": 1.0588235294117648e-05, | |
| "loss": 1.2908, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11173184357541899, | |
| "grad_norm": 0.68792724609375, | |
| "learning_rate": 1.2794117647058824e-05, | |
| "loss": 1.2953, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1303538175046555, | |
| "grad_norm": 0.6625350117683411, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.192, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.148975791433892, | |
| "grad_norm": 0.5564359426498413, | |
| "learning_rate": 1.7205882352941175e-05, | |
| "loss": 1.1406, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16759776536312848, | |
| "grad_norm": 0.4591367542743683, | |
| "learning_rate": 1.9411764705882355e-05, | |
| "loss": 1.181, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.186219739292365, | |
| "grad_norm": 0.5679731369018555, | |
| "learning_rate": 2.161764705882353e-05, | |
| "loss": 1.1935, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2048417132216015, | |
| "grad_norm": 0.4359203577041626, | |
| "learning_rate": 2.3823529411764704e-05, | |
| "loss": 1.1527, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.22346368715083798, | |
| "grad_norm": 0.49340277910232544, | |
| "learning_rate": 2.6029411764705883e-05, | |
| "loss": 1.1635, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24208566108007448, | |
| "grad_norm": 0.4740353226661682, | |
| "learning_rate": 2.823529411764706e-05, | |
| "loss": 1.1883, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.260707635009311, | |
| "grad_norm": 0.6028391122817993, | |
| "learning_rate": 2.9999954608033783e-05, | |
| "loss": 1.1328, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.27932960893854747, | |
| "grad_norm": 0.5162932872772217, | |
| "learning_rate": 2.9998365918062082e-05, | |
| "loss": 1.0934, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.297951582867784, | |
| "grad_norm": 0.5382624864578247, | |
| "learning_rate": 2.9994507904496206e-05, | |
| "loss": 1.1019, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3165735567970205, | |
| "grad_norm": 0.5565810799598694, | |
| "learning_rate": 2.998838115107183e-05, | |
| "loss": 1.0906, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.33519553072625696, | |
| "grad_norm": 0.5212501287460327, | |
| "learning_rate": 2.997998658479568e-05, | |
| "loss": 1.0724, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3538175046554935, | |
| "grad_norm": 0.5489919185638428, | |
| "learning_rate": 2.9969325475805274e-05, | |
| "loss": 1.0658, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.37243947858473, | |
| "grad_norm": 0.6241554021835327, | |
| "learning_rate": 2.995639943717676e-05, | |
| "loss": 1.059, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.39106145251396646, | |
| "grad_norm": 0.563462495803833, | |
| "learning_rate": 2.9941210424680813e-05, | |
| "loss": 1.057, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.409683426443203, | |
| "grad_norm": 0.5747496485710144, | |
| "learning_rate": 2.9923760736486766e-05, | |
| "loss": 1.0564, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.42830540037243947, | |
| "grad_norm": 0.5738973021507263, | |
| "learning_rate": 2.9904053012814848e-05, | |
| "loss": 1.0316, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.44692737430167595, | |
| "grad_norm": 0.6159545183181763, | |
| "learning_rate": 2.988209023553672e-05, | |
| "loss": 0.995, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4655493482309125, | |
| "grad_norm": 0.6447931528091431, | |
| "learning_rate": 2.9857875727724304e-05, | |
| "loss": 0.9692, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.48417132216014896, | |
| "grad_norm": 0.7060043215751648, | |
| "learning_rate": 2.9831413153146988e-05, | |
| "loss": 0.9569, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5027932960893855, | |
| "grad_norm": 0.7400388717651367, | |
| "learning_rate": 2.9802706515717272e-05, | |
| "loss": 0.9378, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.521415270018622, | |
| "grad_norm": 0.7127872705459595, | |
| "learning_rate": 2.9771760158884972e-05, | |
| "loss": 0.8923, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5400372439478585, | |
| "grad_norm": 0.8123269081115723, | |
| "learning_rate": 2.9738578764980025e-05, | |
| "loss": 0.9285, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5586592178770949, | |
| "grad_norm": 0.8044330477714539, | |
| "learning_rate": 2.9703167354504027e-05, | |
| "loss": 0.885, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5772811918063314, | |
| "grad_norm": 0.7073454856872559, | |
| "learning_rate": 2.966553128537062e-05, | |
| "loss": 0.9022, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.595903165735568, | |
| "grad_norm": 0.725287914276123, | |
| "learning_rate": 2.9625676252094797e-05, | |
| "loss": 0.8863, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6145251396648045, | |
| "grad_norm": 0.843565046787262, | |
| "learning_rate": 2.9583608284931317e-05, | |
| "loss": 0.928, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.633147113594041, | |
| "grad_norm": 0.8186341524124146, | |
| "learning_rate": 2.953933374896227e-05, | |
| "loss": 0.8967, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6517690875232774, | |
| "grad_norm": 0.8052655458450317, | |
| "learning_rate": 2.949285934313405e-05, | |
| "loss": 0.8766, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6703910614525139, | |
| "grad_norm": 1.142082691192627, | |
| "learning_rate": 2.9444192099243733e-05, | |
| "loss": 0.8402, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6890130353817505, | |
| "grad_norm": 0.9169373512268066, | |
| "learning_rate": 2.939333938087515e-05, | |
| "loss": 0.8426, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.707635009310987, | |
| "grad_norm": 0.8522142171859741, | |
| "learning_rate": 2.9340308882284747e-05, | |
| "loss": 0.8288, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7262569832402235, | |
| "grad_norm": 0.8226320147514343, | |
| "learning_rate": 2.92851086272374e-05, | |
| "loss": 0.8252, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.74487895716946, | |
| "grad_norm": 0.840149462223053, | |
| "learning_rate": 2.9227746967792392e-05, | |
| "loss": 0.7888, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7635009310986964, | |
| "grad_norm": 1.0344595909118652, | |
| "learning_rate": 2.916823258303968e-05, | |
| "loss": 0.7889, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.7821229050279329, | |
| "grad_norm": 0.9850447177886963, | |
| "learning_rate": 2.9106574477786748e-05, | |
| "loss": 0.7634, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8007448789571695, | |
| "grad_norm": 0.9077547192573547, | |
| "learning_rate": 2.9042781981196095e-05, | |
| "loss": 0.7372, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.819366852886406, | |
| "grad_norm": 0.8616479635238647, | |
| "learning_rate": 2.897686474537373e-05, | |
| "loss": 0.7238, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8379888268156425, | |
| "grad_norm": 0.9395949244499207, | |
| "learning_rate": 2.890883274390872e-05, | |
| "loss": 0.6952, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.8566108007448789, | |
| "grad_norm": 0.9580796957015991, | |
| "learning_rate": 2.8838696270364183e-05, | |
| "loss": 0.6983, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8752327746741154, | |
| "grad_norm": 0.8633882403373718, | |
| "learning_rate": 2.8766465936719785e-05, | |
| "loss": 0.7479, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.8938547486033519, | |
| "grad_norm": 1.0677725076675415, | |
| "learning_rate": 2.869215267176612e-05, | |
| "loss": 0.7132, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9124767225325885, | |
| "grad_norm": 1.0318955183029175, | |
| "learning_rate": 2.8615767719451125e-05, | |
| "loss": 0.6744, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.931098696461825, | |
| "grad_norm": 0.8933286070823669, | |
| "learning_rate": 2.8537322637178816e-05, | |
| "loss": 0.705, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9497206703910615, | |
| "grad_norm": 0.8829292058944702, | |
| "learning_rate": 2.8456829294060608e-05, | |
| "loss": 0.7091, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.9683426443202979, | |
| "grad_norm": 1.1101434230804443, | |
| "learning_rate": 2.837429986911944e-05, | |
| "loss": 0.6532, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9869646182495344, | |
| "grad_norm": 1.070557713508606, | |
| "learning_rate": 2.828974684944707e-05, | |
| "loss": 0.6493, | |
| "step": 265 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1345, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.85525873404543e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |