| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 269, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01858736059479554, |
| "grad_norm": 1.7745643057657652, |
| "learning_rate": 1.785714285714286e-05, |
| "loss": 0.8465, |
| "num_tokens": 10465392.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03717472118959108, |
| "grad_norm": 0.7774883299140104, |
| "learning_rate": 3.571428571428572e-05, |
| "loss": 0.7778, |
| "num_tokens": 20951152.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.055762081784386616, |
| "grad_norm": 0.4887795745895529, |
| "learning_rate": 4.9998292477583695e-05, |
| "loss": 0.7095, |
| "num_tokens": 31436912.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07434944237918216, |
| "grad_norm": 0.4048620465005431, |
| "learning_rate": 4.993855640118024e-05, |
| "loss": 0.6711, |
| "num_tokens": 41909269.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09293680297397769, |
| "grad_norm": 0.3700973048111578, |
| "learning_rate": 4.9793703194560106e-05, |
| "loss": 0.652, |
| "num_tokens": 52353719.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11152416356877323, |
| "grad_norm": 0.2591810692769517, |
| "learning_rate": 4.9564282335552e-05, |
| "loss": 0.6366, |
| "num_tokens": 62792124.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13011152416356878, |
| "grad_norm": 0.25627868811652355, |
| "learning_rate": 4.9251164096056716e-05, |
| "loss": 0.6219, |
| "num_tokens": 73260442.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.14869888475836432, |
| "grad_norm": 0.24377676889228242, |
| "learning_rate": 4.885553624080778e-05, |
| "loss": 0.6054, |
| "num_tokens": 83727556.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16728624535315986, |
| "grad_norm": 0.25406462587460105, |
| "learning_rate": 4.8378899521772935e-05, |
| "loss": 0.6043, |
| "num_tokens": 94201938.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.18587360594795538, |
| "grad_norm": 0.29469855378477344, |
| "learning_rate": 4.78230619852879e-05, |
| "loss": 0.5968, |
| "num_tokens": 104670740.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.20446096654275092, |
| "grad_norm": 0.2872741452007558, |
| "learning_rate": 4.719013211351733e-05, |
| "loss": 0.5998, |
| "num_tokens": 115151933.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.22304832713754646, |
| "grad_norm": 0.35994881708559934, |
| "learning_rate": 4.648251082625975e-05, |
| "loss": 0.5906, |
| "num_tokens": 125607410.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.241635687732342, |
| "grad_norm": 0.3685132191710582, |
| "learning_rate": 4.570288237343632e-05, |
| "loss": 0.5882, |
| "num_tokens": 136036466.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.26022304832713755, |
| "grad_norm": 0.3543208991287171, |
| "learning_rate": 4.4854204152811567e-05, |
| "loss": 0.5849, |
| "num_tokens": 146502251.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2788104089219331, |
| "grad_norm": 0.32978990735476776, |
| "learning_rate": 4.39396954915706e-05, |
| "loss": 0.5822, |
| "num_tokens": 156977308.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.29739776951672864, |
| "grad_norm": 0.24954573936489757, |
| "learning_rate": 4.2962825434308415e-05, |
| "loss": 0.5761, |
| "num_tokens": 167423794.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3159851301115242, |
| "grad_norm": 0.2927528555691777, |
| "learning_rate": 4.1927299583755515e-05, |
| "loss": 0.5685, |
| "num_tokens": 177900466.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.3345724907063197, |
| "grad_norm": 0.2800523214277643, |
| "learning_rate": 4.083704604415748e-05, |
| "loss": 0.5725, |
| "num_tokens": 188373813.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.35315985130111527, |
| "grad_norm": 0.2567949727132378, |
| "learning_rate": 3.969620052063012e-05, |
| "loss": 0.5707, |
| "num_tokens": 198859382.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.37174721189591076, |
| "grad_norm": 0.2930993202940322, |
| "learning_rate": 3.850909063101328e-05, |
| "loss": 0.5676, |
| "num_tokens": 209345142.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3903345724907063, |
| "grad_norm": 0.2705016112461222, |
| "learning_rate": 3.728021948973421e-05, |
| "loss": 0.5668, |
| "num_tokens": 219820636.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.40892193308550184, |
| "grad_norm": 0.26900394198529787, |
| "learning_rate": 3.6014248625951984e-05, |
| "loss": 0.559, |
| "num_tokens": 230306396.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4275092936802974, |
| "grad_norm": 0.3105968286768211, |
| "learning_rate": 3.4715980300780745e-05, |
| "loss": 0.5627, |
| "num_tokens": 240792156.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.44609665427509293, |
| "grad_norm": 0.2794347051102847, |
| "learning_rate": 3.339033929066841e-05, |
| "loss": 0.5611, |
| "num_tokens": 251259365.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4646840148698885, |
| "grad_norm": 0.2774895900629833, |
| "learning_rate": 3.204235420603273e-05, |
| "loss": 0.5644, |
| "num_tokens": 261736444.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.483271375464684, |
| "grad_norm": 0.2664390627857969, |
| "learning_rate": 3.0677138416019556e-05, |
| "loss": 0.563, |
| "num_tokens": 272205866.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5018587360594795, |
| "grad_norm": 0.23913804982394823, |
| "learning_rate": 2.9299870651742188e-05, |
| "loss": 0.5587, |
| "num_tokens": 282683327.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5204460966542751, |
| "grad_norm": 0.22843043596249354, |
| "learning_rate": 2.7915775361580428e-05, |
| "loss": 0.5612, |
| "num_tokens": 293122806.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5390334572490706, |
| "grad_norm": 0.1851400693793969, |
| "learning_rate": 2.6530102893058357e-05, |
| "loss": 0.5605, |
| "num_tokens": 303598947.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5576208178438662, |
| "grad_norm": 0.23559825842595322, |
| "learning_rate": 2.5148109576477802e-05, |
| "loss": 0.5579, |
| "num_tokens": 314077641.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5762081784386617, |
| "grad_norm": 0.1902236485150502, |
| "learning_rate": 2.3775037785857073e-05, |
| "loss": 0.5488, |
| "num_tokens": 324545386.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5947955390334573, |
| "grad_norm": 0.17385694526894205, |
| "learning_rate": 2.2416096052810688e-05, |
| "loss": 0.5609, |
| "num_tokens": 335031146.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6133828996282528, |
| "grad_norm": 0.19869286588405813, |
| "learning_rate": 2.1076439308804808e-05, |
| "loss": 0.5411, |
| "num_tokens": 345504904.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6319702602230484, |
| "grad_norm": 0.1727497860741924, |
| "learning_rate": 1.976114933073662e-05, |
| "loss": 0.5507, |
| "num_tokens": 355956520.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6505576208178439, |
| "grad_norm": 0.16204945679669835, |
| "learning_rate": 1.847521546401383e-05, |
| "loss": 0.5514, |
| "num_tokens": 366434348.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6691449814126395, |
| "grad_norm": 0.18545031117275818, |
| "learning_rate": 1.7223515696258592e-05, |
| "loss": 0.5543, |
| "num_tokens": 376908695.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6877323420074349, |
| "grad_norm": 0.18033324985725832, |
| "learning_rate": 1.60107981534296e-05, |
| "loss": 0.5571, |
| "num_tokens": 387384301.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7063197026022305, |
| "grad_norm": 0.16333455921070916, |
| "learning_rate": 1.484166308855398e-05, |
| "loss": 0.5479, |
| "num_tokens": 397835323.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.724907063197026, |
| "grad_norm": 0.1589171492236666, |
| "learning_rate": 1.372054543139188e-05, |
| "loss": 0.5467, |
| "num_tokens": 408298322.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7434944237918215, |
| "grad_norm": 0.1513935721991138, |
| "learning_rate": 1.2651697965228748e-05, |
| "loss": 0.5525, |
| "num_tokens": 418781566.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7620817843866171, |
| "grad_norm": 0.16287890792055218, |
| "learning_rate": 1.1639175194611693e-05, |
| "loss": 0.5484, |
| "num_tokens": 429246222.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7806691449814126, |
| "grad_norm": 0.16086390738211148, |
| "learning_rate": 1.0686817965224952e-05, |
| "loss": 0.5425, |
| "num_tokens": 439717644.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7992565055762082, |
| "grad_norm": 0.15119980861622911, |
| "learning_rate": 9.798238894246628e-06, |
| "loss": 0.5517, |
| "num_tokens": 450170885.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.8178438661710037, |
| "grad_norm": 0.1708174952390217, |
| "learning_rate": 8.976808666454292e-06, |
| "loss": 0.5477, |
| "num_tokens": 460656645.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8364312267657993, |
| "grad_norm": 0.16470071988717208, |
| "learning_rate": 8.225643248063091e-06, |
| "loss": 0.5399, |
| "num_tokens": 471142405.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8550185873605948, |
| "grad_norm": 0.13464679088750628, |
| "learning_rate": 7.547592066798609e-06, |
| "loss": 0.5443, |
| "num_tokens": 481616409.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8736059479553904, |
| "grad_norm": 0.15726989595579502, |
| "learning_rate": 6.94522720304148e-06, |
| "loss": 0.5435, |
| "num_tokens": 492097674.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8921933085501859, |
| "grad_norm": 0.14241839012356916, |
| "learning_rate": 6.420833633045514e-06, |
| "loss": 0.5413, |
| "num_tokens": 502555042.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9107806691449815, |
| "grad_norm": 0.14182956097827554, |
| "learning_rate": 5.976400561240085e-06, |
| "loss": 0.5393, |
| "num_tokens": 513031521.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.929368029739777, |
| "grad_norm": 0.137113547383338, |
| "learning_rate": 5.613613874496393e-06, |
| "loss": 0.5475, |
| "num_tokens": 523517281.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9479553903345725, |
| "grad_norm": 0.14289355305602414, |
| "learning_rate": 5.333849746981104e-06, |
| "loss": 0.5439, |
| "num_tokens": 533998146.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.966542750929368, |
| "grad_norm": 0.13225422693426547, |
| "learning_rate": 5.138169419856345e-06, |
| "loss": 0.5399, |
| "num_tokens": 544470561.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9851301115241635, |
| "grad_norm": 0.1415248161046304, |
| "learning_rate": 5.027315175628478e-06, |
| "loss": 0.5362, |
| "num_tokens": 554934577.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.0, |
| "num_tokens": 562274609.0, |
| "step": 269, |
| "total_flos": 490444463013888.0, |
| "train_loss": 0.5787177183371051, |
| "train_runtime": 11570.0559, |
| "train_samples_per_second": 2.965, |
| "train_steps_per_second": 0.023 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 269, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 490444463013888.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|