| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9986996098829649, | |
| "eval_steps": 500, | |
| "global_step": 384, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002600780234070221, | |
| "grad_norm": 0.02962934412062168, | |
| "learning_rate": 1.282051282051282e-06, | |
| "loss": 0.619, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02600780234070221, | |
| "grad_norm": 0.06379027664661407, | |
| "learning_rate": 1.282051282051282e-05, | |
| "loss": 0.6962, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05201560468140442, | |
| "grad_norm": 0.0363883376121521, | |
| "learning_rate": 2.564102564102564e-05, | |
| "loss": 0.7759, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07802340702210664, | |
| "grad_norm": 0.03419478237628937, | |
| "learning_rate": 3.846153846153846e-05, | |
| "loss": 0.8087, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10403120936280884, | |
| "grad_norm": 0.04424262419342995, | |
| "learning_rate": 4.985507246376812e-05, | |
| "loss": 0.7775, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13003901170351106, | |
| "grad_norm": 0.22272075712680817, | |
| "learning_rate": 4.840579710144928e-05, | |
| "loss": 0.7476, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15604681404421328, | |
| "grad_norm": 0.049193304032087326, | |
| "learning_rate": 4.695652173913044e-05, | |
| "loss": 0.6617, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18205461638491546, | |
| "grad_norm": 0.04189423844218254, | |
| "learning_rate": 4.5507246376811595e-05, | |
| "loss": 0.7254, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20806241872561768, | |
| "grad_norm": 0.033223457634449005, | |
| "learning_rate": 4.405797101449275e-05, | |
| "loss": 0.7454, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2340702210663199, | |
| "grad_norm": 0.023022688925266266, | |
| "learning_rate": 4.2608695652173916e-05, | |
| "loss": 0.7263, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.26007802340702213, | |
| "grad_norm": 0.1517011970281601, | |
| "learning_rate": 4.115942028985507e-05, | |
| "loss": 0.7241, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.28608582574772434, | |
| "grad_norm": 0.041623640805482864, | |
| "learning_rate": 3.971014492753624e-05, | |
| "loss": 0.647, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.31209362808842656, | |
| "grad_norm": 0.03412195295095444, | |
| "learning_rate": 3.8260869565217395e-05, | |
| "loss": 0.6991, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3381014304291287, | |
| "grad_norm": 0.02426602691411972, | |
| "learning_rate": 3.681159420289855e-05, | |
| "loss": 0.7115, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3641092327698309, | |
| "grad_norm": 0.023634808138012886, | |
| "learning_rate": 3.536231884057971e-05, | |
| "loss": 0.6992, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.39011703511053314, | |
| "grad_norm": 0.1857312172651291, | |
| "learning_rate": 3.3913043478260867e-05, | |
| "loss": 0.7133, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41612483745123535, | |
| "grad_norm": 0.057914506644010544, | |
| "learning_rate": 3.246376811594203e-05, | |
| "loss": 0.637, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.44213263979193757, | |
| "grad_norm": 0.0314478725194931, | |
| "learning_rate": 3.1014492753623195e-05, | |
| "loss": 0.69, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4681404421326398, | |
| "grad_norm": 0.02375701256096363, | |
| "learning_rate": 2.9565217391304352e-05, | |
| "loss": 0.7052, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.494148244473342, | |
| "grad_norm": 0.017046812921762466, | |
| "learning_rate": 2.811594202898551e-05, | |
| "loss": 0.6963, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5201560468140443, | |
| "grad_norm": 0.14757999777793884, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.699, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5461638491547465, | |
| "grad_norm": 0.03953570872545242, | |
| "learning_rate": 2.5217391304347827e-05, | |
| "loss": 0.6362, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5721716514954487, | |
| "grad_norm": 0.031761154532432556, | |
| "learning_rate": 2.3768115942028988e-05, | |
| "loss": 0.6929, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5981794538361509, | |
| "grad_norm": 0.019830092787742615, | |
| "learning_rate": 2.2318840579710145e-05, | |
| "loss": 0.6936, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6241872561768531, | |
| "grad_norm": 0.017688650637865067, | |
| "learning_rate": 2.0869565217391303e-05, | |
| "loss": 0.692, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6501950585175552, | |
| "grad_norm": 0.18702688813209534, | |
| "learning_rate": 1.9420289855072467e-05, | |
| "loss": 0.7103, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6762028608582574, | |
| "grad_norm": 0.03623680770397186, | |
| "learning_rate": 1.7971014492753624e-05, | |
| "loss": 0.6185, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7022106631989596, | |
| "grad_norm": 0.026319777593016624, | |
| "learning_rate": 1.652173913043478e-05, | |
| "loss": 0.7065, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7282184655396619, | |
| "grad_norm": 0.018396981060504913, | |
| "learning_rate": 1.5072463768115944e-05, | |
| "loss": 0.6869, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7542262678803641, | |
| "grad_norm": 0.016413649544119835, | |
| "learning_rate": 1.3623188405797103e-05, | |
| "loss": 0.6865, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7802340702210663, | |
| "grad_norm": 0.1341114193201065, | |
| "learning_rate": 1.2173913043478261e-05, | |
| "loss": 0.7022, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8062418725617685, | |
| "grad_norm": 0.03741007670760155, | |
| "learning_rate": 1.072463768115942e-05, | |
| "loss": 0.6272, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8322496749024707, | |
| "grad_norm": 0.024399157613515854, | |
| "learning_rate": 9.27536231884058e-06, | |
| "loss": 0.6793, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8582574772431729, | |
| "grad_norm": 0.016972342506051064, | |
| "learning_rate": 7.82608695652174e-06, | |
| "loss": 0.7078, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8842652795838751, | |
| "grad_norm": 0.014587855897843838, | |
| "learning_rate": 6.376811594202898e-06, | |
| "loss": 0.7041, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9102730819245773, | |
| "grad_norm": 0.13855686783790588, | |
| "learning_rate": 4.927536231884058e-06, | |
| "loss": 0.6831, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9362808842652796, | |
| "grad_norm": 0.03484239801764488, | |
| "learning_rate": 3.4782608695652175e-06, | |
| "loss": 0.6321, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9622886866059818, | |
| "grad_norm": 0.022825093939900398, | |
| "learning_rate": 2.028985507246377e-06, | |
| "loss": 0.6889, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.988296488946684, | |
| "grad_norm": 0.019488025456666946, | |
| "learning_rate": 5.797101449275362e-07, | |
| "loss": 0.6797, | |
| "step": 380 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 384, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.415557240450187e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |