{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 200.0, "global_step": 354, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005649717514124294, "grad_norm": 1.8059313297271729, "learning_rate": 4.999901553476555e-05, "loss": 0.4717830717563629, "step": 1, "token_acc": 0.924627166465135 }, { "epoch": 0.05649717514124294, "grad_norm": 0.1542048305273056, "learning_rate": 4.9901617425775067e-05, "loss": 0.33008625772264266, "step": 10, "token_acc": 0.9219071687140808 }, { "epoch": 0.11299435028248588, "grad_norm": 0.11267846822738647, "learning_rate": 4.9607244033573156e-05, "loss": 0.25351030826568605, "step": 20, "token_acc": 0.9264000170247177 }, { "epoch": 0.1694915254237288, "grad_norm": 0.09091860800981522, "learning_rate": 4.91191967203629e-05, "loss": 0.22513890266418457, "step": 30, "token_acc": 0.9317765577689588 }, { "epoch": 0.22598870056497175, "grad_norm": 0.08639991283416748, "learning_rate": 4.84413167142257e-05, "loss": 0.20885028839111328, "step": 40, "token_acc": 0.9359174715592828 }, { "epoch": 0.2824858757062147, "grad_norm": 0.09375399351119995, "learning_rate": 4.7578939341563095e-05, "loss": 0.19784480333328247, "step": 50, "token_acc": 0.9384661788621316 }, { "epoch": 0.3389830508474576, "grad_norm": 0.08870115131139755, "learning_rate": 4.653885203484515e-05, "loss": 0.18448562622070314, "step": 60, "token_acc": 0.9420314698252165 }, { "epoch": 0.3954802259887006, "grad_norm": 0.11067840456962585, "learning_rate": 4.532924091140417e-05, "loss": 0.18294379711151124, "step": 70, "token_acc": 0.9418481147105683 }, { "epoch": 0.4519774011299435, "grad_norm": 0.09862152487039566, "learning_rate": 4.395962634373097e-05, "loss": 0.17243103981018065, "step": 80, "token_acc": 0.9446352200693965 }, { "epoch": 0.5084745762711864, "grad_norm": 0.10678666085004807, "learning_rate": 4.2440788028374624e-05, "loss": 0.1731430172920227, "step": 90, "token_acc": 0.9447363875815018 }, { "epoch": 0.5649717514124294, "grad_norm": 0.10532315075397491, "learning_rate": 4.0784680143198836e-05, "loss": 0.17281131744384765, "step": 100, "token_acc": 0.9447437022704439 }, { "epoch": 0.6214689265536724, "grad_norm": 0.11211931705474854, "learning_rate": 3.900433726075865e-05, "loss": 0.16182489395141603, "step": 110, "token_acc": 0.9481675818843257 }, { "epoch": 0.6779661016949152, "grad_norm": 0.10726941376924515, "learning_rate": 3.711377175831626e-05, "loss": 0.16202739477157593, "step": 120, "token_acc": 0.9479687034245692 }, { "epoch": 0.7344632768361582, "grad_norm": 0.10400757193565369, "learning_rate": 3.512786353194134e-05, "loss": 0.15924739837646484, "step": 130, "token_acc": 0.9486699455285843 }, { "epoch": 0.7909604519774012, "grad_norm": 0.10787644982337952, "learning_rate": 3.3062242882712724e-05, "loss": 0.15439069271087646, "step": 140, "token_acc": 0.9498849158473873 }, { "epoch": 0.847457627118644, "grad_norm": 0.10836105048656464, "learning_rate": 3.093316749677788e-05, "loss": 0.15822217464447022, "step": 150, "token_acc": 0.9487123526844173 }, { "epoch": 0.903954802259887, "grad_norm": 0.10872391611337662, "learning_rate": 2.875739448751176e-05, "loss": 0.15569958686828614, "step": 160, "token_acc": 0.9491473105803542 }, { "epoch": 0.96045197740113, "grad_norm": 0.11218578368425369, "learning_rate": 2.655204850688085e-05, "loss": 0.15528473854064942, "step": 170, "token_acc": 0.9494661997922623 }, { "epoch": 1.0169491525423728, "grad_norm": 0.13003411889076233, "learning_rate": 2.433448696405563e-05, "loss": 0.1492830991744995, "step": 180, "token_acc": 0.9511860316683133 }, { "epoch": 1.073446327683616, "grad_norm": 0.12652547657489777, "learning_rate": 2.2122163412082927e-05, "loss": 0.14996984004974365, "step": 190, "token_acc": 0.9509753894028877 }, { "epoch": 1.1299435028248588, "grad_norm": 0.11710216104984283, "learning_rate": 1.993249017784766e-05, "loss": 0.149368953704834, "step": 200, "token_acc": 0.9509840746795515 }, { "epoch": 1.1864406779661016, "grad_norm": 0.12813611328601837, "learning_rate": 1.778270131650948e-05, "loss": 0.1482247829437256, "step": 210, "token_acc": 0.9514638991717056 }, { "epoch": 1.2429378531073447, "grad_norm": 0.12335359305143356, "learning_rate": 1.5689716969045848e-05, "loss": 0.14458421468734742, "step": 220, "token_acc": 0.9528204997080846 }, { "epoch": 1.2994350282485876, "grad_norm": 0.1252630650997162, "learning_rate": 1.3670010190490073e-05, "loss": 0.14932241439819335, "step": 230, "token_acc": 0.950983923940499 }, { "epoch": 1.3559322033898304, "grad_norm": 0.12602832913398743, "learning_rate": 1.173947729700644e-05, "loss": 0.14413282871246338, "step": 240, "token_acc": 0.9524894618411184 }, { "epoch": 1.4124293785310735, "grad_norm": 0.1404254138469696, "learning_rate": 9.913312752249903e-06, "loss": 0.14448442459106445, "step": 250, "token_acc": 0.9523784878342272 }, { "epoch": 1.4689265536723164, "grad_norm": 0.1297323852777481, "learning_rate": 8.20588957773018e-06, "loss": 0.1442911744117737, "step": 260, "token_acc": 0.9526485262065045 }, { "epoch": 1.5254237288135593, "grad_norm": 0.1164567619562149, "learning_rate": 6.6306462284233234e-06, "loss": 0.1476944088935852, "step": 270, "token_acc": 0.9516979818914234 }, { "epoch": 1.5819209039548023, "grad_norm": 0.12876106798648834, "learning_rate": 5.199980823988157e-06, "loss": 0.14429720640182495, "step": 280, "token_acc": 0.9527345847326476 }, { "epoch": 1.6384180790960452, "grad_norm": 0.12522290647029877, "learning_rate": 3.925153568052123e-06, "loss": 0.14247846603393555, "step": 290, "token_acc": 0.952577761791889 }, { "epoch": 1.694915254237288, "grad_norm": 0.1215895265340805, "learning_rate": 2.8161981235857143e-06, "loss": 0.14371044635772706, "step": 300, "token_acc": 0.9530261029770724 }, { "epoch": 1.7514124293785311, "grad_norm": 0.13429652154445648, "learning_rate": 1.881842641895104e-06, "loss": 0.1435616970062256, "step": 310, "token_acc": 0.9526384206465796 }, { "epoch": 1.807909604519774, "grad_norm": 0.12633894383907318, "learning_rate": 1.129441066782702e-06, "loss": 0.14912809133529664, "step": 320, "token_acc": 0.9509201261393581 }, { "epoch": 1.8644067796610169, "grad_norm": 0.12346093356609344, "learning_rate": 5.649152545533332e-07, "loss": 0.14490561485290526, "step": 330, "token_acc": 0.9519967728922952 }, { "epoch": 1.92090395480226, "grad_norm": 0.123548224568367, "learning_rate": 1.927083654168854e-07, "loss": 0.14034559726715087, "step": 340, "token_acc": 0.9537333066731213 }, { "epoch": 1.9774011299435028, "grad_norm": 0.1290796846151352, "learning_rate": 1.5749893125160954e-08, "loss": 0.14156577587127686, "step": 350, "token_acc": 0.9530219643471618 } ], "logging_steps": 10, "max_steps": 354, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.089589125686231e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }