| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.761904761904763, |
| "eval_steps": 500, |
| "global_step": 390, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12698412698412698, |
| "grad_norm": 0.296919047832489, |
| "learning_rate": 6.41025641025641e-06, |
| "loss": 3.8313, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.25396825396825395, |
| "grad_norm": 0.33771219849586487, |
| "learning_rate": 1.282051282051282e-05, |
| "loss": 3.8502, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 0.41062507033348083, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 3.8537, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.5079365079365079, |
| "grad_norm": 0.5127325057983398, |
| "learning_rate": 2.564102564102564e-05, |
| "loss": 3.6949, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.5786504149436951, |
| "learning_rate": 3.205128205128206e-05, |
| "loss": 3.5371, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.6281372904777527, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 3.3822, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.677841305732727, |
| "learning_rate": 4.4871794871794874e-05, |
| "loss": 3.0836, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7423370480537415, |
| "learning_rate": 4.999899863449631e-05, |
| "loss": 2.5068, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.126984126984127, |
| "grad_norm": 0.6677795648574829, |
| "learning_rate": 4.9963959264103544e-05, |
| "loss": 1.9914, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.253968253968254, |
| "grad_norm": 0.7547211050987244, |
| "learning_rate": 4.98789318082748e-05, |
| "loss": 1.7329, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.380952380952381, |
| "grad_norm": 0.6714099645614624, |
| "learning_rate": 4.974408652685072e-05, |
| "loss": 1.4401, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.507936507936508, |
| "grad_norm": 0.6784941554069519, |
| "learning_rate": 4.955969343539162e-05, |
| "loss": 1.3861, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.6349206349206349, |
| "grad_norm": 0.8798362016677856, |
| "learning_rate": 4.9326121764495596e-05, |
| "loss": 1.1406, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.7619047619047619, |
| "grad_norm": 0.7543187737464905, |
| "learning_rate": 4.90438392204474e-05, |
| "loss": 1.2, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.7632691860198975, |
| "learning_rate": 4.8713411048678635e-05, |
| "loss": 1.1137, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8194475173950195, |
| "learning_rate": 4.83354989019146e-05, |
| "loss": 0.8988, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.126984126984127, |
| "grad_norm": 0.5129795670509338, |
| "learning_rate": 4.791085951527408e-05, |
| "loss": 1.0144, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.253968253968254, |
| "grad_norm": 0.5384055972099304, |
| "learning_rate": 4.744034319097535e-05, |
| "loss": 0.9417, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.5197637677192688, |
| "learning_rate": 4.692489209568234e-05, |
| "loss": 0.9124, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.507936507936508, |
| "grad_norm": 0.5490031838417053, |
| "learning_rate": 4.636553837390051e-05, |
| "loss": 1.0367, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.634920634920635, |
| "grad_norm": 0.4497690796852112, |
| "learning_rate": 4.5763402081200294e-05, |
| "loss": 0.9391, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.761904761904762, |
| "grad_norm": 0.40242263674736023, |
| "learning_rate": 4.511968894140639e-05, |
| "loss": 0.8668, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.4423123598098755, |
| "learning_rate": 4.443568793224415e-05, |
| "loss": 0.9419, |
| "step": 115 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7325336933135986, |
| "learning_rate": 4.371276870427753e-05, |
| "loss": 0.9379, |
| "step": 120 |
| }, |
| { |
| "epoch": 3.126984126984127, |
| "grad_norm": 0.4649835526943207, |
| "learning_rate": 4.295237883830685e-05, |
| "loss": 0.8399, |
| "step": 125 |
| }, |
| { |
| "epoch": 3.253968253968254, |
| "grad_norm": 0.49590322375297546, |
| "learning_rate": 4.215604094671835e-05, |
| "loss": 0.8626, |
| "step": 130 |
| }, |
| { |
| "epoch": 3.380952380952381, |
| "grad_norm": 0.5896856188774109, |
| "learning_rate": 4.132534962458962e-05, |
| "loss": 0.8826, |
| "step": 135 |
| }, |
| { |
| "epoch": 3.507936507936508, |
| "grad_norm": 0.4864223301410675, |
| "learning_rate": 4.0461968256656376e-05, |
| "loss": 0.8538, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.634920634920635, |
| "grad_norm": 0.5413251519203186, |
| "learning_rate": 3.956762568653378e-05, |
| "loss": 0.8788, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.761904761904762, |
| "grad_norm": 0.5263978838920593, |
| "learning_rate": 3.8644112754862614e-05, |
| "loss": 0.8279, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.888888888888889, |
| "grad_norm": 0.5732172131538391, |
| "learning_rate": 3.76932787133117e-05, |
| "loss": 0.7522, |
| "step": 155 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.8840337991714478, |
| "learning_rate": 3.6717027521617595e-05, |
| "loss": 0.8394, |
| "step": 160 |
| }, |
| { |
| "epoch": 4.1269841269841265, |
| "grad_norm": 0.5274865031242371, |
| "learning_rate": 3.5717314035076355e-05, |
| "loss": 0.8122, |
| "step": 165 |
| }, |
| { |
| "epoch": 4.253968253968254, |
| "grad_norm": 0.57438725233078, |
| "learning_rate": 3.4696140090121376e-05, |
| "loss": 0.8541, |
| "step": 170 |
| }, |
| { |
| "epoch": 4.380952380952381, |
| "grad_norm": 0.6474905610084534, |
| "learning_rate": 3.365555049582582e-05, |
| "loss": 0.7446, |
| "step": 175 |
| }, |
| { |
| "epoch": 4.507936507936508, |
| "grad_norm": 0.6062578558921814, |
| "learning_rate": 3.2597628939356175e-05, |
| "loss": 0.7282, |
| "step": 180 |
| }, |
| { |
| "epoch": 4.634920634920634, |
| "grad_norm": 0.7624045610427856, |
| "learning_rate": 3.152449381357593e-05, |
| "loss": 0.6846, |
| "step": 185 |
| }, |
| { |
| "epoch": 4.761904761904762, |
| "grad_norm": 0.6446404457092285, |
| "learning_rate": 3.0438293975154186e-05, |
| "loss": 0.7597, |
| "step": 190 |
| }, |
| { |
| "epoch": 4.888888888888889, |
| "grad_norm": 0.7280287146568298, |
| "learning_rate": 2.9341204441673266e-05, |
| "loss": 0.6788, |
| "step": 195 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.213476538658142, |
| "learning_rate": 2.8235422036351382e-05, |
| "loss": 0.7306, |
| "step": 200 |
| }, |
| { |
| "epoch": 5.1269841269841265, |
| "grad_norm": 0.6391934156417847, |
| "learning_rate": 2.712316098910162e-05, |
| "loss": 0.5925, |
| "step": 205 |
| }, |
| { |
| "epoch": 5.253968253968254, |
| "grad_norm": 0.6667851209640503, |
| "learning_rate": 2.600664850273538e-05, |
| "loss": 0.6754, |
| "step": 210 |
| }, |
| { |
| "epoch": 5.380952380952381, |
| "grad_norm": 0.8730446100234985, |
| "learning_rate": 2.4888120293188916e-05, |
| "loss": 0.7179, |
| "step": 215 |
| }, |
| { |
| "epoch": 5.507936507936508, |
| "grad_norm": 0.8416940569877625, |
| "learning_rate": 2.3769816112703047e-05, |
| "loss": 0.6565, |
| "step": 220 |
| }, |
| { |
| "epoch": 5.634920634920634, |
| "grad_norm": 0.7638925909996033, |
| "learning_rate": 2.265397526492052e-05, |
| "loss": 0.629, |
| "step": 225 |
| }, |
| { |
| "epoch": 5.761904761904762, |
| "grad_norm": 0.8404481410980225, |
| "learning_rate": 2.154283212088168e-05, |
| "loss": 0.6779, |
| "step": 230 |
| }, |
| { |
| "epoch": 5.888888888888889, |
| "grad_norm": 0.7644646167755127, |
| "learning_rate": 2.043861164489719e-05, |
| "loss": 0.6363, |
| "step": 235 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.407417893409729, |
| "learning_rate": 1.934352493925695e-05, |
| "loss": 0.6873, |
| "step": 240 |
| }, |
| { |
| "epoch": 6.1269841269841265, |
| "grad_norm": 0.8645577430725098, |
| "learning_rate": 1.825976481669641e-05, |
| "loss": 0.6057, |
| "step": 245 |
| }, |
| { |
| "epoch": 6.253968253968254, |
| "grad_norm": 0.8925438523292542, |
| "learning_rate": 1.7189501409486062e-05, |
| "loss": 0.5787, |
| "step": 250 |
| }, |
| { |
| "epoch": 6.380952380952381, |
| "grad_norm": 1.0913372039794922, |
| "learning_rate": 1.613487782393661e-05, |
| "loss": 0.5813, |
| "step": 255 |
| }, |
| { |
| "epoch": 6.507936507936508, |
| "grad_norm": 1.0974910259246826, |
| "learning_rate": 1.509800584902108e-05, |
| "loss": 0.5534, |
| "step": 260 |
| }, |
| { |
| "epoch": 6.634920634920634, |
| "grad_norm": 0.9944404363632202, |
| "learning_rate": 1.4080961727707184e-05, |
| "loss": 0.5976, |
| "step": 265 |
| }, |
| { |
| "epoch": 6.761904761904762, |
| "grad_norm": 0.9145245552062988, |
| "learning_rate": 1.3085781999467303e-05, |
| "loss": 0.5592, |
| "step": 270 |
| }, |
| { |
| "epoch": 6.888888888888889, |
| "grad_norm": 1.0390015840530396, |
| "learning_rate": 1.2114459422291205e-05, |
| "loss": 0.5811, |
| "step": 275 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.7172333002090454, |
| "learning_rate": 1.116893898236716e-05, |
| "loss": 0.6019, |
| "step": 280 |
| }, |
| { |
| "epoch": 7.1269841269841265, |
| "grad_norm": 0.9733636975288391, |
| "learning_rate": 1.0251113999421935e-05, |
| "loss": 0.4985, |
| "step": 285 |
| }, |
| { |
| "epoch": 7.253968253968254, |
| "grad_norm": 1.0347152948379517, |
| "learning_rate": 9.362822335518063e-06, |
| "loss": 0.5585, |
| "step": 290 |
| }, |
| { |
| "epoch": 7.380952380952381, |
| "grad_norm": 0.8855388164520264, |
| "learning_rate": 8.505842714900297e-06, |
| "loss": 0.5163, |
| "step": 295 |
| }, |
| { |
| "epoch": 7.507936507936508, |
| "grad_norm": 1.2762815952301025, |
| "learning_rate": 7.681891162260015e-06, |
| "loss": 0.5637, |
| "step": 300 |
| }, |
| { |
| "epoch": 7.634920634920634, |
| "grad_norm": 1.1591953039169312, |
| "learning_rate": 6.892617566550044e-06, |
| "loss": 0.4971, |
| "step": 305 |
| }, |
| { |
| "epoch": 7.761904761904762, |
| "grad_norm": 1.12722647190094, |
| "learning_rate": 6.1396023772302465e-06, |
| "loss": 0.5231, |
| "step": 310 |
| }, |
| { |
| "epoch": 7.888888888888889, |
| "grad_norm": 1.0456628799438477, |
| "learning_rate": 5.424353439559446e-06, |
| "loss": 0.5106, |
| "step": 315 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 2.141434669494629, |
| "learning_rate": 4.748302975270838e-06, |
| "loss": 0.5518, |
| "step": 320 |
| }, |
| { |
| "epoch": 8.126984126984127, |
| "grad_norm": 1.1136949062347412, |
| "learning_rate": 4.112804714676594e-06, |
| "loss": 0.5119, |
| "step": 325 |
| }, |
| { |
| "epoch": 8.253968253968253, |
| "grad_norm": 1.1117428541183472, |
| "learning_rate": 3.5191311859445796e-06, |
| "loss": 0.484, |
| "step": 330 |
| }, |
| { |
| "epoch": 8.380952380952381, |
| "grad_norm": 1.1094739437103271, |
| "learning_rate": 2.9684711669750313e-06, |
| "loss": 0.4984, |
| "step": 335 |
| }, |
| { |
| "epoch": 8.507936507936508, |
| "grad_norm": 1.3133466243743896, |
| "learning_rate": 2.4619273049796e-06, |
| "loss": 0.5241, |
| "step": 340 |
| }, |
| { |
| "epoch": 8.634920634920634, |
| "grad_norm": 1.1593372821807861, |
| "learning_rate": 2.0005139085293945e-06, |
| "loss": 0.5127, |
| "step": 345 |
| }, |
| { |
| "epoch": 8.761904761904763, |
| "grad_norm": 1.176182746887207, |
| "learning_rate": 1.5851549164932116e-06, |
| "loss": 0.4987, |
| "step": 350 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 1.2116988897323608, |
| "learning_rate": 1.2166820479329572e-06, |
| "loss": 0.4652, |
| "step": 355 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 1.6440435647964478, |
| "learning_rate": 8.958331366609423e-07, |
| "loss": 0.4911, |
| "step": 360 |
| }, |
| { |
| "epoch": 9.126984126984127, |
| "grad_norm": 1.0842829942703247, |
| "learning_rate": 6.232506537939941e-07, |
| "loss": 0.4461, |
| "step": 365 |
| }, |
| { |
| "epoch": 9.253968253968253, |
| "grad_norm": 1.2127434015274048, |
| "learning_rate": 3.994804212627462e-07, |
| "loss": 0.5129, |
| "step": 370 |
| }, |
| { |
| "epoch": 9.380952380952381, |
| "grad_norm": 1.1075149774551392, |
| "learning_rate": 2.2497051885228827e-07, |
| "loss": 0.4841, |
| "step": 375 |
| }, |
| { |
| "epoch": 9.507936507936508, |
| "grad_norm": 1.1152573823928833, |
| "learning_rate": 1.0007038696262516e-07, |
| "loss": 0.4762, |
| "step": 380 |
| }, |
| { |
| "epoch": 9.634920634920634, |
| "grad_norm": 1.2555053234100342, |
| "learning_rate": 2.5030126885694506e-08, |
| "loss": 0.4963, |
| "step": 385 |
| }, |
| { |
| "epoch": 9.761904761904763, |
| "grad_norm": 1.1572444438934326, |
| "learning_rate": 0.0, |
| "loss": 0.4631, |
| "step": 390 |
| }, |
| { |
| "epoch": 9.761904761904763, |
| "step": 390, |
| "total_flos": 3.320475505655808e+16, |
| "train_loss": 1.0253899280841534, |
| "train_runtime": 4620.778, |
| "train_samples_per_second": 2.727, |
| "train_steps_per_second": 0.084 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 390, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 39, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.320475505655808e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|