{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 581, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01721170395869191, "grad_norm": 3.358337640762329, "learning_rate": 7.627118644067798e-07, "loss": 0.23148756027221679, "step": 10 }, { "epoch": 0.03442340791738382, "grad_norm": 1.5195997953414917, "learning_rate": 1.6101694915254237e-06, "loss": 0.18045387268066407, "step": 20 }, { "epoch": 0.05163511187607573, "grad_norm": 1.4757816791534424, "learning_rate": 2.457627118644068e-06, "loss": 0.13096144199371337, "step": 30 }, { "epoch": 0.06884681583476764, "grad_norm": 0.8429168462753296, "learning_rate": 3.305084745762712e-06, "loss": 0.1102510929107666, "step": 40 }, { "epoch": 0.08605851979345955, "grad_norm": 0.7288976311683655, "learning_rate": 4.152542372881356e-06, "loss": 0.12253910303115845, "step": 50 }, { "epoch": 0.10327022375215146, "grad_norm": 0.8172044157981873, "learning_rate": 5e-06, "loss": 0.1122509241104126, "step": 60 }, { "epoch": 0.12048192771084337, "grad_norm": 0.9126996397972107, "learning_rate": 4.995473759140656e-06, "loss": 0.10185965299606323, "step": 70 }, { "epoch": 0.13769363166953527, "grad_norm": 0.9686183929443359, "learning_rate": 4.981911426047672e-06, "loss": 0.11461906433105469, "step": 80 }, { "epoch": 0.1549053356282272, "grad_norm": 0.822040855884552, "learning_rate": 4.959362109830007e-06, "loss": 0.09727483391761779, "step": 90 }, { "epoch": 0.1721170395869191, "grad_norm": 0.7876383066177368, "learning_rate": 4.9279074613967915e-06, "loss": 0.10690455436706543, "step": 100 }, { "epoch": 0.18932874354561102, "grad_norm": 0.6134992837905884, "learning_rate": 4.887661377799989e-06, "loss": 0.10446040630340576, "step": 110 }, { "epoch": 0.20654044750430292, "grad_norm": 0.5966605544090271, "learning_rate": 4.838769589814003e-06, "loss": 0.10144654512405396, "step": 120 }, { "epoch": 0.22375215146299485, "grad_norm": 0.6216921210289001, "learning_rate": 4.781409134245608e-06, "loss": 0.10764830112457276, "step": 130 }, { "epoch": 0.24096385542168675, "grad_norm": 0.7262535095214844, "learning_rate": 4.715787712884969e-06, "loss": 0.1007878303527832, "step": 140 }, { "epoch": 0.25817555938037867, "grad_norm": 0.7887536883354187, "learning_rate": 4.642142940418973e-06, "loss": 0.10809539556503296, "step": 150 }, { "epoch": 0.27538726333907054, "grad_norm": 0.9238594174385071, "learning_rate": 4.560741484030192e-06, "loss": 0.10565640926361083, "step": 160 }, { "epoch": 0.29259896729776247, "grad_norm": 0.9821227192878723, "learning_rate": 4.471878097796958e-06, "loss": 0.1082463026046753, "step": 170 }, { "epoch": 0.3098106712564544, "grad_norm": 0.9219503998756409, "learning_rate": 4.3758745553910065e-06, "loss": 0.1083601474761963, "step": 180 }, { "epoch": 0.3270223752151463, "grad_norm": 0.8320638537406921, "learning_rate": 4.2730784849373615e-06, "loss": 0.11396574974060059, "step": 190 }, { "epoch": 0.3442340791738382, "grad_norm": 0.49399641156196594, "learning_rate": 4.163862110255436e-06, "loss": 0.10837725400924683, "step": 200 }, { "epoch": 0.3614457831325301, "grad_norm": 0.6740733981132507, "learning_rate": 4.048620903039308e-06, "loss": 0.10683045387268067, "step": 210 }, { "epoch": 0.37865748709122204, "grad_norm": 1.210553765296936, "learning_rate": 3.927772150857601e-06, "loss": 0.11184821128845215, "step": 220 }, { "epoch": 0.3958691910499139, "grad_norm": 0.8200420141220093, "learning_rate": 3.8017534461582563e-06, "loss": 0.10036113262176513, "step": 230 }, { "epoch": 0.41308089500860584, "grad_norm": 0.533598005771637, "learning_rate": 3.671021101749476e-06, "loss": 0.10928145647048951, "step": 240 }, { "epoch": 0.43029259896729777, "grad_norm": 0.6937351822853088, "learning_rate": 3.5360484984943797e-06, "loss": 0.10983037948608398, "step": 250 }, { "epoch": 0.4475043029259897, "grad_norm": 0.8502167463302612, "learning_rate": 3.397324371202365e-06, "loss": 0.10992729663848877, "step": 260 }, { "epoch": 0.46471600688468157, "grad_norm": 0.7206671237945557, "learning_rate": 3.255351038923932e-06, "loss": 0.10280261039733887, "step": 270 }, { "epoch": 0.4819277108433735, "grad_norm": 0.7110823392868042, "learning_rate": 3.110642586057078e-06, "loss": 0.1118739128112793, "step": 280 }, { "epoch": 0.4991394148020654, "grad_norm": 0.6398674249649048, "learning_rate": 2.9637230008514495e-06, "loss": 0.10549364089965821, "step": 290 }, { "epoch": 0.5163511187607573, "grad_norm": 1.137890338897705, "learning_rate": 2.815124278050724e-06, "loss": 0.10759081840515136, "step": 300 }, { "epoch": 0.5335628227194492, "grad_norm": 0.7178072929382324, "learning_rate": 2.6653844925435296e-06, "loss": 0.1023525357246399, "step": 310 }, { "epoch": 0.5507745266781411, "grad_norm": 0.5058847069740295, "learning_rate": 2.515045850998214e-06, "loss": 0.10535409450531005, "step": 320 }, { "epoch": 0.5679862306368331, "grad_norm": 0.9442250728607178, "learning_rate": 2.3646527285364565e-06, "loss": 0.10064098834991456, "step": 330 }, { "epoch": 0.5851979345955249, "grad_norm": 0.6629244089126587, "learning_rate": 2.2147496975549386e-06, "loss": 0.10554497241973877, "step": 340 }, { "epoch": 0.6024096385542169, "grad_norm": 0.7381961345672607, "learning_rate": 2.0658795558326745e-06, "loss": 0.1006193995475769, "step": 350 }, { "epoch": 0.6196213425129088, "grad_norm": 0.7213409543037415, "learning_rate": 1.9185813610642248e-06, "loss": 0.10112185478210449, "step": 360 }, { "epoch": 0.6368330464716007, "grad_norm": 1.0835481882095337, "learning_rate": 1.7733884789357228e-06, "loss": 0.10866096019744872, "step": 370 }, { "epoch": 0.6540447504302926, "grad_norm": 0.8973684906959534, "learning_rate": 1.6308266518116318e-06, "loss": 0.10433266162872315, "step": 380 }, { "epoch": 0.6712564543889845, "grad_norm": 0.7562016248703003, "learning_rate": 1.491412095025479e-06, "loss": 0.10279223918914795, "step": 390 }, { "epoch": 0.6884681583476764, "grad_norm": 0.9921433329582214, "learning_rate": 1.3556496276679165e-06, "loss": 0.09440263509750366, "step": 400 }, { "epoch": 0.7056798623063684, "grad_norm": 0.9576047658920288, "learning_rate": 1.2240308446404796e-06, "loss": 0.0905891478061676, "step": 410 }, { "epoch": 0.7228915662650602, "grad_norm": 0.8586844205856323, "learning_rate": 1.0970323365940443e-06, "loss": 0.10087704658508301, "step": 420 }, { "epoch": 0.7401032702237521, "grad_norm": 0.6407778859138489, "learning_rate": 9.751139641975681e-07, "loss": 0.10743443965911866, "step": 430 }, { "epoch": 0.7573149741824441, "grad_norm": 0.6808867454528809, "learning_rate": 8.587171929859667e-07, "loss": 0.09688538312911987, "step": 440 }, { "epoch": 0.774526678141136, "grad_norm": 0.9380479454994202, "learning_rate": 7.482634948166442e-07, "loss": 0.10448521375656128, "step": 450 }, { "epoch": 0.7917383820998278, "grad_norm": 0.5422897934913635, "learning_rate": 6.441528217229753e-07, "loss": 0.09819400906562806, "step": 460 }, { "epoch": 0.8089500860585198, "grad_norm": 0.6771324872970581, "learning_rate": 5.467621576909219e-07, "loss": 0.10061649084091187, "step": 470 }, { "epoch": 0.8261617900172117, "grad_norm": 1.1676146984100342, "learning_rate": 4.5644415360277057e-07, "loss": 0.10219871997833252, "step": 480 }, { "epoch": 0.8433734939759037, "grad_norm": 0.8499922156333923, "learning_rate": 3.7352585029087367e-07, "loss": 0.09681237936019897, "step": 490 }, { "epoch": 0.8605851979345955, "grad_norm": 0.6682608723640442, "learning_rate": 2.983074943251807e-07, "loss": 0.08911526203155518, "step": 500 }, { "epoch": 0.8777969018932874, "grad_norm": 0.854381263256073, "learning_rate": 2.3106145082260777e-07, "loss": 0.10029389858245849, "step": 510 }, { "epoch": 0.8950086058519794, "grad_norm": 1.097908616065979, "learning_rate": 1.7203121721493883e-07, "loss": 0.10202760696411133, "step": 520 }, { "epoch": 0.9122203098106713, "grad_norm": 0.7215556502342224, "learning_rate": 1.214305415464076e-07, "loss": 0.09217539429664612, "step": 530 }, { "epoch": 0.9294320137693631, "grad_norm": 0.6004782319068909, "learning_rate": 7.944264849359173e-08, "loss": 0.09551474452018738, "step": 540 }, { "epoch": 0.9466437177280551, "grad_norm": 0.7537798285484314, "learning_rate": 4.6219575910197153e-08, "loss": 0.09391357898712158, "step": 550 }, { "epoch": 0.963855421686747, "grad_norm": 0.7966172695159912, "learning_rate": 2.188162429910462e-08, "loss": 0.10805587768554688, "step": 560 }, { "epoch": 0.9810671256454389, "grad_norm": 0.7142431139945984, "learning_rate": 6.516921205125537e-09, "loss": 0.1027875542640686, "step": 570 }, { "epoch": 0.9982788296041308, "grad_norm": 0.4579184055328369, "learning_rate": 1.8110210580846166e-10, "loss": 0.09767600893974304, "step": 580 } ], "logging_steps": 10, "max_steps": 581, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1551053421805568.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }