{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9986996098829649, "eval_steps": 500, "global_step": 384, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002600780234070221, "grad_norm": 0.02962934412062168, "learning_rate": 1.282051282051282e-06, "loss": 0.619, "step": 1 }, { "epoch": 0.02600780234070221, "grad_norm": 0.06379027664661407, "learning_rate": 1.282051282051282e-05, "loss": 0.6962, "step": 10 }, { "epoch": 0.05201560468140442, "grad_norm": 0.0363883376121521, "learning_rate": 2.564102564102564e-05, "loss": 0.7759, "step": 20 }, { "epoch": 0.07802340702210664, "grad_norm": 0.03419478237628937, "learning_rate": 3.846153846153846e-05, "loss": 0.8087, "step": 30 }, { "epoch": 0.10403120936280884, "grad_norm": 0.04424262419342995, "learning_rate": 4.985507246376812e-05, "loss": 0.7775, "step": 40 }, { "epoch": 0.13003901170351106, "grad_norm": 0.22272075712680817, "learning_rate": 4.840579710144928e-05, "loss": 0.7476, "step": 50 }, { "epoch": 0.15604681404421328, "grad_norm": 0.049193304032087326, "learning_rate": 4.695652173913044e-05, "loss": 0.6617, "step": 60 }, { "epoch": 0.18205461638491546, "grad_norm": 0.04189423844218254, "learning_rate": 4.5507246376811595e-05, "loss": 0.7254, "step": 70 }, { "epoch": 0.20806241872561768, "grad_norm": 0.033223457634449005, "learning_rate": 4.405797101449275e-05, "loss": 0.7454, "step": 80 }, { "epoch": 0.2340702210663199, "grad_norm": 0.023022688925266266, "learning_rate": 4.2608695652173916e-05, "loss": 0.7263, "step": 90 }, { "epoch": 0.26007802340702213, "grad_norm": 0.1517011970281601, "learning_rate": 4.115942028985507e-05, "loss": 0.7241, "step": 100 }, { "epoch": 0.28608582574772434, "grad_norm": 0.041623640805482864, "learning_rate": 3.971014492753624e-05, "loss": 0.647, "step": 110 }, { "epoch": 0.31209362808842656, "grad_norm": 0.03412195295095444, "learning_rate": 3.8260869565217395e-05, "loss": 0.6991, "step": 120 }, { "epoch": 0.3381014304291287, "grad_norm": 0.02426602691411972, "learning_rate": 3.681159420289855e-05, "loss": 0.7115, "step": 130 }, { "epoch": 0.3641092327698309, "grad_norm": 0.023634808138012886, "learning_rate": 3.536231884057971e-05, "loss": 0.6992, "step": 140 }, { "epoch": 0.39011703511053314, "grad_norm": 0.1857312172651291, "learning_rate": 3.3913043478260867e-05, "loss": 0.7133, "step": 150 }, { "epoch": 0.41612483745123535, "grad_norm": 0.057914506644010544, "learning_rate": 3.246376811594203e-05, "loss": 0.637, "step": 160 }, { "epoch": 0.44213263979193757, "grad_norm": 0.0314478725194931, "learning_rate": 3.1014492753623195e-05, "loss": 0.69, "step": 170 }, { "epoch": 0.4681404421326398, "grad_norm": 0.02375701256096363, "learning_rate": 2.9565217391304352e-05, "loss": 0.7052, "step": 180 }, { "epoch": 0.494148244473342, "grad_norm": 0.017046812921762466, "learning_rate": 2.811594202898551e-05, "loss": 0.6963, "step": 190 }, { "epoch": 0.5201560468140443, "grad_norm": 0.14757999777793884, "learning_rate": 2.6666666666666667e-05, "loss": 0.699, "step": 200 }, { "epoch": 0.5461638491547465, "grad_norm": 0.03953570872545242, "learning_rate": 2.5217391304347827e-05, "loss": 0.6362, "step": 210 }, { "epoch": 0.5721716514954487, "grad_norm": 0.031761154532432556, "learning_rate": 2.3768115942028988e-05, "loss": 0.6929, "step": 220 }, { "epoch": 0.5981794538361509, "grad_norm": 0.019830092787742615, "learning_rate": 2.2318840579710145e-05, "loss": 0.6936, "step": 230 }, { "epoch": 0.6241872561768531, "grad_norm": 0.017688650637865067, "learning_rate": 2.0869565217391303e-05, "loss": 0.692, "step": 240 }, { "epoch": 0.6501950585175552, "grad_norm": 0.18702688813209534, "learning_rate": 1.9420289855072467e-05, "loss": 0.7103, "step": 250 }, { "epoch": 0.6762028608582574, "grad_norm": 0.03623680770397186, "learning_rate": 1.7971014492753624e-05, "loss": 0.6185, "step": 260 }, { "epoch": 0.7022106631989596, "grad_norm": 0.026319777593016624, "learning_rate": 1.652173913043478e-05, "loss": 0.7065, "step": 270 }, { "epoch": 0.7282184655396619, "grad_norm": 0.018396981060504913, "learning_rate": 1.5072463768115944e-05, "loss": 0.6869, "step": 280 }, { "epoch": 0.7542262678803641, "grad_norm": 0.016413649544119835, "learning_rate": 1.3623188405797103e-05, "loss": 0.6865, "step": 290 }, { "epoch": 0.7802340702210663, "grad_norm": 0.1341114193201065, "learning_rate": 1.2173913043478261e-05, "loss": 0.7022, "step": 300 }, { "epoch": 0.8062418725617685, "grad_norm": 0.03741007670760155, "learning_rate": 1.072463768115942e-05, "loss": 0.6272, "step": 310 }, { "epoch": 0.8322496749024707, "grad_norm": 0.024399157613515854, "learning_rate": 9.27536231884058e-06, "loss": 0.6793, "step": 320 }, { "epoch": 0.8582574772431729, "grad_norm": 0.016972342506051064, "learning_rate": 7.82608695652174e-06, "loss": 0.7078, "step": 330 }, { "epoch": 0.8842652795838751, "grad_norm": 0.014587855897843838, "learning_rate": 6.376811594202898e-06, "loss": 0.7041, "step": 340 }, { "epoch": 0.9102730819245773, "grad_norm": 0.13855686783790588, "learning_rate": 4.927536231884058e-06, "loss": 0.6831, "step": 350 }, { "epoch": 0.9362808842652796, "grad_norm": 0.03484239801764488, "learning_rate": 3.4782608695652175e-06, "loss": 0.6321, "step": 360 }, { "epoch": 0.9622886866059818, "grad_norm": 0.022825093939900398, "learning_rate": 2.028985507246377e-06, "loss": 0.6889, "step": 370 }, { "epoch": 0.988296488946684, "grad_norm": 0.019488025456666946, "learning_rate": 5.797101449275362e-07, "loss": 0.6797, "step": 380 } ], "logging_steps": 10, "max_steps": 384, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.415557240450187e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }