| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 581, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01721170395869191, |
| "grad_norm": 3.358337640762329, |
| "learning_rate": 7.627118644067798e-07, |
| "loss": 0.23148756027221679, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03442340791738382, |
| "grad_norm": 1.5195997953414917, |
| "learning_rate": 1.6101694915254237e-06, |
| "loss": 0.18045387268066407, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05163511187607573, |
| "grad_norm": 1.4757816791534424, |
| "learning_rate": 2.457627118644068e-06, |
| "loss": 0.13096144199371337, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06884681583476764, |
| "grad_norm": 0.8429168462753296, |
| "learning_rate": 3.305084745762712e-06, |
| "loss": 0.1102510929107666, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08605851979345955, |
| "grad_norm": 0.7288976311683655, |
| "learning_rate": 4.152542372881356e-06, |
| "loss": 0.12253910303115845, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10327022375215146, |
| "grad_norm": 0.8172044157981873, |
| "learning_rate": 5e-06, |
| "loss": 0.1122509241104126, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12048192771084337, |
| "grad_norm": 0.9126996397972107, |
| "learning_rate": 4.995473759140656e-06, |
| "loss": 0.10185965299606323, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13769363166953527, |
| "grad_norm": 0.9686183929443359, |
| "learning_rate": 4.981911426047672e-06, |
| "loss": 0.11461906433105469, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1549053356282272, |
| "grad_norm": 0.822040855884552, |
| "learning_rate": 4.959362109830007e-06, |
| "loss": 0.09727483391761779, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1721170395869191, |
| "grad_norm": 0.7876383066177368, |
| "learning_rate": 4.9279074613967915e-06, |
| "loss": 0.10690455436706543, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18932874354561102, |
| "grad_norm": 0.6134992837905884, |
| "learning_rate": 4.887661377799989e-06, |
| "loss": 0.10446040630340576, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.20654044750430292, |
| "grad_norm": 0.5966605544090271, |
| "learning_rate": 4.838769589814003e-06, |
| "loss": 0.10144654512405396, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.22375215146299485, |
| "grad_norm": 0.6216921210289001, |
| "learning_rate": 4.781409134245608e-06, |
| "loss": 0.10764830112457276, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "grad_norm": 0.7262535095214844, |
| "learning_rate": 4.715787712884969e-06, |
| "loss": 0.1007878303527832, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25817555938037867, |
| "grad_norm": 0.7887536883354187, |
| "learning_rate": 4.642142940418973e-06, |
| "loss": 0.10809539556503296, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.27538726333907054, |
| "grad_norm": 0.9238594174385071, |
| "learning_rate": 4.560741484030192e-06, |
| "loss": 0.10565640926361083, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.29259896729776247, |
| "grad_norm": 0.9821227192878723, |
| "learning_rate": 4.471878097796958e-06, |
| "loss": 0.1082463026046753, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3098106712564544, |
| "grad_norm": 0.9219503998756409, |
| "learning_rate": 4.3758745553910065e-06, |
| "loss": 0.1083601474761963, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3270223752151463, |
| "grad_norm": 0.8320638537406921, |
| "learning_rate": 4.2730784849373615e-06, |
| "loss": 0.11396574974060059, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3442340791738382, |
| "grad_norm": 0.49399641156196594, |
| "learning_rate": 4.163862110255436e-06, |
| "loss": 0.10837725400924683, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3614457831325301, |
| "grad_norm": 0.6740733981132507, |
| "learning_rate": 4.048620903039308e-06, |
| "loss": 0.10683045387268067, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.37865748709122204, |
| "grad_norm": 1.210553765296936, |
| "learning_rate": 3.927772150857601e-06, |
| "loss": 0.11184821128845215, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3958691910499139, |
| "grad_norm": 0.8200420141220093, |
| "learning_rate": 3.8017534461582563e-06, |
| "loss": 0.10036113262176513, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.41308089500860584, |
| "grad_norm": 0.533598005771637, |
| "learning_rate": 3.671021101749476e-06, |
| "loss": 0.10928145647048951, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.43029259896729777, |
| "grad_norm": 0.6937351822853088, |
| "learning_rate": 3.5360484984943797e-06, |
| "loss": 0.10983037948608398, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4475043029259897, |
| "grad_norm": 0.8502167463302612, |
| "learning_rate": 3.397324371202365e-06, |
| "loss": 0.10992729663848877, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.46471600688468157, |
| "grad_norm": 0.7206671237945557, |
| "learning_rate": 3.255351038923932e-06, |
| "loss": 0.10280261039733887, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.7110823392868042, |
| "learning_rate": 3.110642586057078e-06, |
| "loss": 0.1118739128112793, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4991394148020654, |
| "grad_norm": 0.6398674249649048, |
| "learning_rate": 2.9637230008514495e-06, |
| "loss": 0.10549364089965821, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5163511187607573, |
| "grad_norm": 1.137890338897705, |
| "learning_rate": 2.815124278050724e-06, |
| "loss": 0.10759081840515136, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5335628227194492, |
| "grad_norm": 0.7178072929382324, |
| "learning_rate": 2.6653844925435296e-06, |
| "loss": 0.1023525357246399, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5507745266781411, |
| "grad_norm": 0.5058847069740295, |
| "learning_rate": 2.515045850998214e-06, |
| "loss": 0.10535409450531005, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5679862306368331, |
| "grad_norm": 0.9442250728607178, |
| "learning_rate": 2.3646527285364565e-06, |
| "loss": 0.10064098834991456, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5851979345955249, |
| "grad_norm": 0.6629244089126587, |
| "learning_rate": 2.2147496975549386e-06, |
| "loss": 0.10554497241973877, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6024096385542169, |
| "grad_norm": 0.7381961345672607, |
| "learning_rate": 2.0658795558326745e-06, |
| "loss": 0.1006193995475769, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6196213425129088, |
| "grad_norm": 0.7213409543037415, |
| "learning_rate": 1.9185813610642248e-06, |
| "loss": 0.10112185478210449, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6368330464716007, |
| "grad_norm": 1.0835481882095337, |
| "learning_rate": 1.7733884789357228e-06, |
| "loss": 0.10866096019744872, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6540447504302926, |
| "grad_norm": 0.8973684906959534, |
| "learning_rate": 1.6308266518116318e-06, |
| "loss": 0.10433266162872315, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6712564543889845, |
| "grad_norm": 0.7562016248703003, |
| "learning_rate": 1.491412095025479e-06, |
| "loss": 0.10279223918914795, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6884681583476764, |
| "grad_norm": 0.9921433329582214, |
| "learning_rate": 1.3556496276679165e-06, |
| "loss": 0.09440263509750366, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7056798623063684, |
| "grad_norm": 0.9576047658920288, |
| "learning_rate": 1.2240308446404796e-06, |
| "loss": 0.0905891478061676, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7228915662650602, |
| "grad_norm": 0.8586844205856323, |
| "learning_rate": 1.0970323365940443e-06, |
| "loss": 0.10087704658508301, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7401032702237521, |
| "grad_norm": 0.6407778859138489, |
| "learning_rate": 9.751139641975681e-07, |
| "loss": 0.10743443965911866, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7573149741824441, |
| "grad_norm": 0.6808867454528809, |
| "learning_rate": 8.587171929859667e-07, |
| "loss": 0.09688538312911987, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.774526678141136, |
| "grad_norm": 0.9380479454994202, |
| "learning_rate": 7.482634948166442e-07, |
| "loss": 0.10448521375656128, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7917383820998278, |
| "grad_norm": 0.5422897934913635, |
| "learning_rate": 6.441528217229753e-07, |
| "loss": 0.09819400906562806, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8089500860585198, |
| "grad_norm": 0.6771324872970581, |
| "learning_rate": 5.467621576909219e-07, |
| "loss": 0.10061649084091187, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8261617900172117, |
| "grad_norm": 1.1676146984100342, |
| "learning_rate": 4.5644415360277057e-07, |
| "loss": 0.10219871997833252, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8433734939759037, |
| "grad_norm": 0.8499922156333923, |
| "learning_rate": 3.7352585029087367e-07, |
| "loss": 0.09681237936019897, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8605851979345955, |
| "grad_norm": 0.6682608723640442, |
| "learning_rate": 2.983074943251807e-07, |
| "loss": 0.08911526203155518, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8777969018932874, |
| "grad_norm": 0.854381263256073, |
| "learning_rate": 2.3106145082260777e-07, |
| "loss": 0.10029389858245849, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8950086058519794, |
| "grad_norm": 1.097908616065979, |
| "learning_rate": 1.7203121721493883e-07, |
| "loss": 0.10202760696411133, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9122203098106713, |
| "grad_norm": 0.7215556502342224, |
| "learning_rate": 1.214305415464076e-07, |
| "loss": 0.09217539429664612, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9294320137693631, |
| "grad_norm": 0.6004782319068909, |
| "learning_rate": 7.944264849359173e-08, |
| "loss": 0.09551474452018738, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9466437177280551, |
| "grad_norm": 0.7537798285484314, |
| "learning_rate": 4.6219575910197153e-08, |
| "loss": 0.09391357898712158, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.7966172695159912, |
| "learning_rate": 2.188162429910462e-08, |
| "loss": 0.10805587768554688, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.9810671256454389, |
| "grad_norm": 0.7142431139945984, |
| "learning_rate": 6.516921205125537e-09, |
| "loss": 0.1027875542640686, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9982788296041308, |
| "grad_norm": 0.4579184055328369, |
| "learning_rate": 1.8110210580846166e-10, |
| "loss": 0.09767600893974304, |
| "step": 580 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 581, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1551053421805568.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|