| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1552, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0032232070910556, | |
| "grad_norm": 70.69935607910156, | |
| "learning_rate": 1.6077170418006432e-07, | |
| "loss": 0.9483, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0064464141821112, | |
| "grad_norm": 41.78139877319336, | |
| "learning_rate": 3.2154340836012864e-07, | |
| "loss": 0.8601, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.009669621273166801, | |
| "grad_norm": 39.28358840942383, | |
| "learning_rate": 4.823151125401929e-07, | |
| "loss": 0.7662, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0128928283642224, | |
| "grad_norm": 26.412134170532227, | |
| "learning_rate": 6.430868167202573e-07, | |
| "loss": 0.5686, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.016116035455278, | |
| "grad_norm": 12.356087684631348, | |
| "learning_rate": 8.038585209003216e-07, | |
| "loss": 0.4011, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.019339242546333603, | |
| "grad_norm": 7.162217617034912, | |
| "learning_rate": 9.646302250803859e-07, | |
| "loss": 0.3031, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.022562449637389202, | |
| "grad_norm": 5.508477687835693, | |
| "learning_rate": 1.1254019292604503e-06, | |
| "loss": 0.245, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0257856567284448, | |
| "grad_norm": 6.486464500427246, | |
| "learning_rate": 1.2861736334405146e-06, | |
| "loss": 0.2266, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.029008863819500404, | |
| "grad_norm": 5.0619378089904785, | |
| "learning_rate": 1.4469453376205788e-06, | |
| "loss": 0.1895, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.032232070910556, | |
| "grad_norm": 3.8913064002990723, | |
| "learning_rate": 1.6077170418006432e-06, | |
| "loss": 0.1845, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.035455278001611606, | |
| "grad_norm": 3.2244060039520264, | |
| "learning_rate": 1.7684887459807077e-06, | |
| "loss": 0.1619, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.038678485092667206, | |
| "grad_norm": 4.426245212554932, | |
| "learning_rate": 1.9292604501607717e-06, | |
| "loss": 0.1638, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.041901692183722805, | |
| "grad_norm": 4.924641132354736, | |
| "learning_rate": 2.090032154340836e-06, | |
| "loss": 0.1535, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.045124899274778404, | |
| "grad_norm": 4.1020894050598145, | |
| "learning_rate": 2.2508038585209006e-06, | |
| "loss": 0.1514, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.048348106365834004, | |
| "grad_norm": 3.604417562484741, | |
| "learning_rate": 2.411575562700965e-06, | |
| "loss": 0.136, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0515713134568896, | |
| "grad_norm": 3.021495819091797, | |
| "learning_rate": 2.572347266881029e-06, | |
| "loss": 0.1329, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0547945205479452, | |
| "grad_norm": 3.2783167362213135, | |
| "learning_rate": 2.7331189710610936e-06, | |
| "loss": 0.142, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05801772763900081, | |
| "grad_norm": 2.818120241165161, | |
| "learning_rate": 2.8938906752411576e-06, | |
| "loss": 0.1305, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06124093473005641, | |
| "grad_norm": 3.0875892639160156, | |
| "learning_rate": 3.054662379421222e-06, | |
| "loss": 0.123, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.064464141821112, | |
| "grad_norm": 2.726335048675537, | |
| "learning_rate": 3.2154340836012865e-06, | |
| "loss": 0.139, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06768734891216761, | |
| "grad_norm": 2.3112688064575195, | |
| "learning_rate": 3.376205787781351e-06, | |
| "loss": 0.1174, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.07091055600322321, | |
| "grad_norm": 2.9780499935150146, | |
| "learning_rate": 3.5369774919614154e-06, | |
| "loss": 0.1342, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07413376309427881, | |
| "grad_norm": 3.2706351280212402, | |
| "learning_rate": 3.69774919614148e-06, | |
| "loss": 0.1225, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07735697018533441, | |
| "grad_norm": 2.3604736328125, | |
| "learning_rate": 3.8585209003215434e-06, | |
| "loss": 0.1298, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08058017727639001, | |
| "grad_norm": 2.7637319564819336, | |
| "learning_rate": 4.0192926045016075e-06, | |
| "loss": 0.1237, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08380338436744561, | |
| "grad_norm": 2.42730975151062, | |
| "learning_rate": 4.180064308681672e-06, | |
| "loss": 0.1321, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08702659145850121, | |
| "grad_norm": 2.5871758460998535, | |
| "learning_rate": 4.340836012861736e-06, | |
| "loss": 0.13, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.09024979854955681, | |
| "grad_norm": 2.662217855453491, | |
| "learning_rate": 4.501607717041801e-06, | |
| "loss": 0.1308, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09347300564061241, | |
| "grad_norm": 2.97275447845459, | |
| "learning_rate": 4.662379421221865e-06, | |
| "loss": 0.1152, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09669621273166801, | |
| "grad_norm": 2.990713119506836, | |
| "learning_rate": 4.82315112540193e-06, | |
| "loss": 0.1241, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0999194198227236, | |
| "grad_norm": 3.011382818222046, | |
| "learning_rate": 4.983922829581994e-06, | |
| "loss": 0.1318, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1031426269137792, | |
| "grad_norm": 2.340388774871826, | |
| "learning_rate": 5.144694533762058e-06, | |
| "loss": 0.1217, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1063658340048348, | |
| "grad_norm": 2.1309595108032227, | |
| "learning_rate": 5.305466237942123e-06, | |
| "loss": 0.1139, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1095890410958904, | |
| "grad_norm": 2.023085355758667, | |
| "learning_rate": 5.466237942122187e-06, | |
| "loss": 0.117, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11281224818694602, | |
| "grad_norm": 2.348984718322754, | |
| "learning_rate": 5.627009646302252e-06, | |
| "loss": 0.1141, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11603545527800162, | |
| "grad_norm": 2.375622272491455, | |
| "learning_rate": 5.787781350482315e-06, | |
| "loss": 0.128, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.11925866236905722, | |
| "grad_norm": 2.196361780166626, | |
| "learning_rate": 5.94855305466238e-06, | |
| "loss": 0.1196, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.12248186946011282, | |
| "grad_norm": 2.2878026962280273, | |
| "learning_rate": 6.109324758842444e-06, | |
| "loss": 0.1244, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12570507655116842, | |
| "grad_norm": 2.213945150375366, | |
| "learning_rate": 6.270096463022508e-06, | |
| "loss": 0.1247, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.128928283642224, | |
| "grad_norm": 2.1751949787139893, | |
| "learning_rate": 6.430868167202573e-06, | |
| "loss": 0.1218, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13215149073327961, | |
| "grad_norm": 3.6750779151916504, | |
| "learning_rate": 6.591639871382637e-06, | |
| "loss": 0.1245, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.13537469782433523, | |
| "grad_norm": 2.0573132038116455, | |
| "learning_rate": 6.752411575562702e-06, | |
| "loss": 0.1222, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1385979049153908, | |
| "grad_norm": 2.161153554916382, | |
| "learning_rate": 6.913183279742766e-06, | |
| "loss": 0.1144, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.14182111200644643, | |
| "grad_norm": 2.023874282836914, | |
| "learning_rate": 7.073954983922831e-06, | |
| "loss": 0.1196, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.145044319097502, | |
| "grad_norm": 2.5854148864746094, | |
| "learning_rate": 7.234726688102894e-06, | |
| "loss": 0.1226, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.14826752618855762, | |
| "grad_norm": 2.1432173252105713, | |
| "learning_rate": 7.39549839228296e-06, | |
| "loss": 0.1141, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1514907332796132, | |
| "grad_norm": 1.9795219898223877, | |
| "learning_rate": 7.556270096463023e-06, | |
| "loss": 0.1214, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.15471394037066882, | |
| "grad_norm": 2.685298204421997, | |
| "learning_rate": 7.717041800643087e-06, | |
| "loss": 0.1166, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1579371474617244, | |
| "grad_norm": 2.327439069747925, | |
| "learning_rate": 7.877813504823153e-06, | |
| "loss": 0.1147, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.16116035455278002, | |
| "grad_norm": 2.873242139816284, | |
| "learning_rate": 8.038585209003215e-06, | |
| "loss": 0.1214, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1643835616438356, | |
| "grad_norm": 1.947021484375, | |
| "learning_rate": 8.19935691318328e-06, | |
| "loss": 0.1181, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.16760676873489122, | |
| "grad_norm": 2.10578989982605, | |
| "learning_rate": 8.360128617363345e-06, | |
| "loss": 0.1166, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1708299758259468, | |
| "grad_norm": 3.1242830753326416, | |
| "learning_rate": 8.520900321543409e-06, | |
| "loss": 0.1223, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.17405318291700242, | |
| "grad_norm": 2.3070671558380127, | |
| "learning_rate": 8.681672025723473e-06, | |
| "loss": 0.1199, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.177276390008058, | |
| "grad_norm": 4.113283157348633, | |
| "learning_rate": 8.842443729903538e-06, | |
| "loss": 0.1359, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.18049959709911362, | |
| "grad_norm": 2.406996488571167, | |
| "learning_rate": 9.003215434083602e-06, | |
| "loss": 0.1247, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.18372280419016923, | |
| "grad_norm": 1.8795942068099976, | |
| "learning_rate": 9.163987138263667e-06, | |
| "loss": 0.1184, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.18694601128122482, | |
| "grad_norm": 1.873802900314331, | |
| "learning_rate": 9.32475884244373e-06, | |
| "loss": 0.1147, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.19016921837228043, | |
| "grad_norm": 2.1450552940368652, | |
| "learning_rate": 9.485530546623795e-06, | |
| "loss": 0.1297, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.19339242546333602, | |
| "grad_norm": 2.236771583557129, | |
| "learning_rate": 9.64630225080386e-06, | |
| "loss": 0.1144, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19661563255439163, | |
| "grad_norm": 2.0439035892486572, | |
| "learning_rate": 9.807073954983923e-06, | |
| "loss": 0.1153, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1998388396454472, | |
| "grad_norm": 2.3851237297058105, | |
| "learning_rate": 9.967845659163988e-06, | |
| "loss": 0.1185, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.20306204673650283, | |
| "grad_norm": 1.9924075603485107, | |
| "learning_rate": 9.99994931968214e-06, | |
| "loss": 0.1094, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2062852538275584, | |
| "grad_norm": 2.0796937942504883, | |
| "learning_rate": 9.999743432651652e-06, | |
| "loss": 0.1211, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.20950846091861403, | |
| "grad_norm": 2.6404528617858887, | |
| "learning_rate": 9.999379177905158e-06, | |
| "loss": 0.1225, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2127316680096696, | |
| "grad_norm": 1.8474249839782715, | |
| "learning_rate": 9.998856566980493e-06, | |
| "loss": 0.1195, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.21595487510072522, | |
| "grad_norm": 2.2194368839263916, | |
| "learning_rate": 9.998175616431443e-06, | |
| "loss": 0.1234, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2191780821917808, | |
| "grad_norm": 2.3846206665039062, | |
| "learning_rate": 9.99733634782723e-06, | |
| "loss": 0.1183, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.22240128928283642, | |
| "grad_norm": 2.1238505840301514, | |
| "learning_rate": 9.996338787751834e-06, | |
| "loss": 0.1133, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.22562449637389204, | |
| "grad_norm": 1.9952363967895508, | |
| "learning_rate": 9.995182967803131e-06, | |
| "loss": 0.1171, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.22884770346494762, | |
| "grad_norm": 1.8000890016555786, | |
| "learning_rate": 9.99386892459192e-06, | |
| "loss": 0.1271, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.23207091055600323, | |
| "grad_norm": 1.680237889289856, | |
| "learning_rate": 9.992396699740738e-06, | |
| "loss": 0.1233, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 1.9006551504135132, | |
| "learning_rate": 9.990766339882554e-06, | |
| "loss": 0.117, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.23851732473811443, | |
| "grad_norm": 2.417584180831909, | |
| "learning_rate": 9.988977896659294e-06, | |
| "loss": 0.1202, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.24174053182917002, | |
| "grad_norm": 1.7894331216812134, | |
| "learning_rate": 9.987031426720195e-06, | |
| "loss": 0.1131, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.24496373892022563, | |
| "grad_norm": 2.3755767345428467, | |
| "learning_rate": 9.984926991720025e-06, | |
| "loss": 0.1199, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.24818694601128122, | |
| "grad_norm": 1.6282379627227783, | |
| "learning_rate": 9.982664658317115e-06, | |
| "loss": 0.1139, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.25141015310233683, | |
| "grad_norm": 2.3639254570007324, | |
| "learning_rate": 9.980244498171256e-06, | |
| "loss": 0.1064, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.25463336019339244, | |
| "grad_norm": 1.6792004108428955, | |
| "learning_rate": 9.97766658794143e-06, | |
| "loss": 0.111, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.257856567284448, | |
| "grad_norm": 2.633594036102295, | |
| "learning_rate": 9.974931009283378e-06, | |
| "loss": 0.1083, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2610797743755036, | |
| "grad_norm": 1.7790218591690063, | |
| "learning_rate": 9.972037848847014e-06, | |
| "loss": 0.1197, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.26430298146655923, | |
| "grad_norm": 2.443464756011963, | |
| "learning_rate": 9.968987198273682e-06, | |
| "loss": 0.1153, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.26752618855761484, | |
| "grad_norm": 1.6622118949890137, | |
| "learning_rate": 9.965779154193256e-06, | |
| "loss": 0.1214, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.27074939564867045, | |
| "grad_norm": 1.398253321647644, | |
| "learning_rate": 9.962413818221071e-06, | |
| "loss": 0.1053, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 2.2201757431030273, | |
| "learning_rate": 9.95889129695471e-06, | |
| "loss": 0.1206, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2771958098307816, | |
| "grad_norm": 2.262932062149048, | |
| "learning_rate": 9.955211701970631e-06, | |
| "loss": 0.1152, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.28041901692183724, | |
| "grad_norm": 1.72652268409729, | |
| "learning_rate": 9.951375149820624e-06, | |
| "loss": 0.114, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.28364222401289285, | |
| "grad_norm": 2.2013440132141113, | |
| "learning_rate": 9.947381762028124e-06, | |
| "loss": 0.1172, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2868654311039484, | |
| "grad_norm": 1.7692981958389282, | |
| "learning_rate": 9.943231665084363e-06, | |
| "loss": 0.108, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.290088638195004, | |
| "grad_norm": 2.222728729248047, | |
| "learning_rate": 9.938924990444363e-06, | |
| "loss": 0.1074, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.29331184528605964, | |
| "grad_norm": 1.67208993434906, | |
| "learning_rate": 9.934461874522767e-06, | |
| "loss": 0.1176, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.29653505237711525, | |
| "grad_norm": 1.7946748733520508, | |
| "learning_rate": 9.929842458689524e-06, | |
| "loss": 0.111, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2997582594681708, | |
| "grad_norm": 1.9234955310821533, | |
| "learning_rate": 9.925066889265412e-06, | |
| "loss": 0.1182, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3029814665592264, | |
| "grad_norm": 2.4538662433624268, | |
| "learning_rate": 9.920135317517393e-06, | |
| "loss": 0.1227, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.30620467365028203, | |
| "grad_norm": 1.8412810564041138, | |
| "learning_rate": 9.915047899653838e-06, | |
| "loss": 0.1128, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.30942788074133765, | |
| "grad_norm": 1.9068711996078491, | |
| "learning_rate": 9.909804796819562e-06, | |
| "loss": 0.1142, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3126510878323932, | |
| "grad_norm": 1.9735403060913086, | |
| "learning_rate": 9.904406175090732e-06, | |
| "loss": 0.1066, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3158742949234488, | |
| "grad_norm": 2.349578619003296, | |
| "learning_rate": 9.898852205469603e-06, | |
| "loss": 0.1187, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.31909750201450443, | |
| "grad_norm": 1.504022479057312, | |
| "learning_rate": 9.893143063879098e-06, | |
| "loss": 0.1051, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.32232070910556004, | |
| "grad_norm": 1.5406743288040161, | |
| "learning_rate": 9.887278931157237e-06, | |
| "loss": 0.1123, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.32554391619661566, | |
| "grad_norm": 1.8361977338790894, | |
| "learning_rate": 9.881259993051415e-06, | |
| "loss": 0.1225, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3287671232876712, | |
| "grad_norm": 1.7701557874679565, | |
| "learning_rate": 9.875086440212511e-06, | |
| "loss": 0.1027, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.33199033037872683, | |
| "grad_norm": 1.6567251682281494, | |
| "learning_rate": 9.86875846818885e-06, | |
| "loss": 0.1206, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.33521353746978244, | |
| "grad_norm": 1.641015887260437, | |
| "learning_rate": 9.862276277420016e-06, | |
| "loss": 0.1183, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.33843674456083805, | |
| "grad_norm": 1.2547988891601562, | |
| "learning_rate": 9.85564007323049e-06, | |
| "loss": 0.1098, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3416599516518936, | |
| "grad_norm": 1.8411493301391602, | |
| "learning_rate": 9.848850065823159e-06, | |
| "loss": 0.1052, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3448831587429492, | |
| "grad_norm": 1.6275116205215454, | |
| "learning_rate": 9.841906470272655e-06, | |
| "loss": 0.1224, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.34810636583400484, | |
| "grad_norm": 2.855224847793579, | |
| "learning_rate": 9.834809506518537e-06, | |
| "loss": 0.1082, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.35132957292506045, | |
| "grad_norm": 2.3182358741760254, | |
| "learning_rate": 9.827559399358327e-06, | |
| "loss": 0.1224, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.354552780016116, | |
| "grad_norm": 2.1670444011688232, | |
| "learning_rate": 9.82015637844039e-06, | |
| "loss": 0.1101, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3577759871071716, | |
| "grad_norm": 1.629321813583374, | |
| "learning_rate": 9.812600678256664e-06, | |
| "loss": 0.1054, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.36099919419822724, | |
| "grad_norm": 1.6911214590072632, | |
| "learning_rate": 9.804892538135225e-06, | |
| "loss": 0.1028, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.36422240128928285, | |
| "grad_norm": 2.162142038345337, | |
| "learning_rate": 9.797032202232708e-06, | |
| "loss": 0.1052, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.36744560838033846, | |
| "grad_norm": 1.6863000392913818, | |
| "learning_rate": 9.789019919526583e-06, | |
| "loss": 0.1078, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.370668815471394, | |
| "grad_norm": 2.0217230319976807, | |
| "learning_rate": 9.780855943807253e-06, | |
| "loss": 0.1152, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.37389202256244963, | |
| "grad_norm": 1.3627716302871704, | |
| "learning_rate": 9.772540533670023e-06, | |
| "loss": 0.1055, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.37711522965350525, | |
| "grad_norm": 1.4767628908157349, | |
| "learning_rate": 9.764073952506913e-06, | |
| "loss": 0.1126, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.38033843674456086, | |
| "grad_norm": 1.769196629524231, | |
| "learning_rate": 9.755456468498307e-06, | |
| "loss": 0.1062, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3835616438356164, | |
| "grad_norm": 1.9426604509353638, | |
| "learning_rate": 9.746688354604467e-06, | |
| "loss": 0.1128, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.38678485092667203, | |
| "grad_norm": 1.6949142217636108, | |
| "learning_rate": 9.737769888556874e-06, | |
| "loss": 0.1058, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.39000805801772764, | |
| "grad_norm": 1.6036336421966553, | |
| "learning_rate": 9.728701352849445e-06, | |
| "loss": 0.1214, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.39323126510878326, | |
| "grad_norm": 1.3234189748764038, | |
| "learning_rate": 9.71948303472958e-06, | |
| "loss": 0.1095, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3964544721998388, | |
| "grad_norm": 1.5805995464324951, | |
| "learning_rate": 9.710115226189054e-06, | |
| "loss": 0.1179, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.3996776792908944, | |
| "grad_norm": 1.5236024856567383, | |
| "learning_rate": 9.700598223954787e-06, | |
| "loss": 0.1065, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.40290088638195004, | |
| "grad_norm": 2.2143023014068604, | |
| "learning_rate": 9.690932329479425e-06, | |
| "loss": 0.1118, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.40612409347300565, | |
| "grad_norm": 2.0677402019500732, | |
| "learning_rate": 9.681117848931806e-06, | |
| "loss": 0.1015, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.40934730056406127, | |
| "grad_norm": 1.786145567893982, | |
| "learning_rate": 9.671155093187256e-06, | |
| "loss": 0.1072, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.4125705076551168, | |
| "grad_norm": 1.661035418510437, | |
| "learning_rate": 9.661044377817745e-06, | |
| "loss": 0.1165, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.41579371474617244, | |
| "grad_norm": 1.7452033758163452, | |
| "learning_rate": 9.650786023081882e-06, | |
| "loss": 0.1107, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.41901692183722805, | |
| "grad_norm": 1.5127534866333008, | |
| "learning_rate": 9.640380353914784e-06, | |
| "loss": 0.1205, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.42224012892828366, | |
| "grad_norm": 1.7860335111618042, | |
| "learning_rate": 9.629827699917777e-06, | |
| "loss": 0.1099, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.4254633360193392, | |
| "grad_norm": 1.8366566896438599, | |
| "learning_rate": 9.619128395347957e-06, | |
| "loss": 0.0995, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.42868654311039484, | |
| "grad_norm": 1.7406480312347412, | |
| "learning_rate": 9.608282779107596e-06, | |
| "loss": 0.1093, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.43190975020145045, | |
| "grad_norm": 1.5550240278244019, | |
| "learning_rate": 9.597291194733417e-06, | |
| "loss": 0.1081, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.43513295729250606, | |
| "grad_norm": 1.8106791973114014, | |
| "learning_rate": 9.58615399038571e-06, | |
| "loss": 0.1092, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 2.0512306690216064, | |
| "learning_rate": 9.574871518837299e-06, | |
| "loss": 0.11, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.44157937147461723, | |
| "grad_norm": 1.536855697631836, | |
| "learning_rate": 9.563444137462373e-06, | |
| "loss": 0.1092, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.44480257856567285, | |
| "grad_norm": 1.4719635248184204, | |
| "learning_rate": 9.55187220822516e-06, | |
| "loss": 0.1081, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.44802578565672846, | |
| "grad_norm": 1.7767363786697388, | |
| "learning_rate": 9.54015609766847e-06, | |
| "loss": 0.1099, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.4512489927477841, | |
| "grad_norm": 1.453895092010498, | |
| "learning_rate": 9.528296176902085e-06, | |
| "loss": 0.1083, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.45447219983883963, | |
| "grad_norm": 1.516648292541504, | |
| "learning_rate": 9.51629282159099e-06, | |
| "loss": 0.1174, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.45769540692989524, | |
| "grad_norm": 1.5289475917816162, | |
| "learning_rate": 9.504146411943488e-06, | |
| "loss": 0.1119, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.46091861402095086, | |
| "grad_norm": 1.7268835306167603, | |
| "learning_rate": 9.491857332699153e-06, | |
| "loss": 0.1067, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.46414182111200647, | |
| "grad_norm": 1.424131989479065, | |
| "learning_rate": 9.47942597311664e-06, | |
| "loss": 0.1096, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.467365028203062, | |
| "grad_norm": 2.6142001152038574, | |
| "learning_rate": 9.466852726961363e-06, | |
| "loss": 0.1132, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 1.7743583917617798, | |
| "learning_rate": 9.454137992493008e-06, | |
| "loss": 0.1095, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.47381144238517325, | |
| "grad_norm": 1.3648674488067627, | |
| "learning_rate": 9.441282172452935e-06, | |
| "loss": 0.1016, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.47703464947622887, | |
| "grad_norm": 1.202217698097229, | |
| "learning_rate": 9.428285674051413e-06, | |
| "loss": 0.1014, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4802578565672844, | |
| "grad_norm": 1.2294992208480835, | |
| "learning_rate": 9.415148908954717e-06, | |
| "loss": 0.0958, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.48348106365834004, | |
| "grad_norm": 1.3715941905975342, | |
| "learning_rate": 9.401872293272096e-06, | |
| "loss": 0.1032, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.48670427074939565, | |
| "grad_norm": 1.2639530897140503, | |
| "learning_rate": 9.38845624754259e-06, | |
| "loss": 0.1047, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.48992747784045126, | |
| "grad_norm": 1.389994502067566, | |
| "learning_rate": 9.37490119672171e-06, | |
| "loss": 0.1072, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4931506849315068, | |
| "grad_norm": 1.6051830053329468, | |
| "learning_rate": 9.361207570167974e-06, | |
| "loss": 0.1021, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.49637389202256244, | |
| "grad_norm": 2.006974458694458, | |
| "learning_rate": 9.347375801629313e-06, | |
| "loss": 0.1038, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.49959709911361805, | |
| "grad_norm": 1.40548837184906, | |
| "learning_rate": 9.333406329229326e-06, | |
| "loss": 0.1064, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5028203062046737, | |
| "grad_norm": 1.4568746089935303, | |
| "learning_rate": 9.319299595453404e-06, | |
| "loss": 0.1109, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5060435132957293, | |
| "grad_norm": 1.7389963865280151, | |
| "learning_rate": 9.305056047134722e-06, | |
| "loss": 0.1082, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5092667203867849, | |
| "grad_norm": 1.2637214660644531, | |
| "learning_rate": 9.29067613544007e-06, | |
| "loss": 0.1019, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5124899274778405, | |
| "grad_norm": 1.7853842973709106, | |
| "learning_rate": 9.276160315855576e-06, | |
| "loss": 0.101, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.515713134568896, | |
| "grad_norm": 1.365021824836731, | |
| "learning_rate": 9.261509048172272e-06, | |
| "loss": 0.0903, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5189363416599516, | |
| "grad_norm": 1.0780879259109497, | |
| "learning_rate": 9.246722796471534e-06, | |
| "loss": 0.1003, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5221595487510072, | |
| "grad_norm": 1.3499747514724731, | |
| "learning_rate": 9.231802029110373e-06, | |
| "loss": 0.108, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5253827558420628, | |
| "grad_norm": 1.2328459024429321, | |
| "learning_rate": 9.216747218706612e-06, | |
| "loss": 0.1086, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5286059629331185, | |
| "grad_norm": 1.665556788444519, | |
| "learning_rate": 9.20155884212391e-06, | |
| "loss": 0.0989, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5318291700241741, | |
| "grad_norm": 1.262510061264038, | |
| "learning_rate": 9.186237380456652e-06, | |
| "loss": 0.1087, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5350523771152297, | |
| "grad_norm": 1.2561684846878052, | |
| "learning_rate": 9.170783319014723e-06, | |
| "loss": 0.1011, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5382755842062853, | |
| "grad_norm": 1.4100691080093384, | |
| "learning_rate": 9.155197147308118e-06, | |
| "loss": 0.1025, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5414987912973409, | |
| "grad_norm": 1.4246737957000732, | |
| "learning_rate": 9.13947935903146e-06, | |
| "loss": 0.1043, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5447219983883964, | |
| "grad_norm": 1.5978039503097534, | |
| "learning_rate": 9.12363045204834e-06, | |
| "loss": 0.1137, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 1.3265248537063599, | |
| "learning_rate": 9.107650928375555e-06, | |
| "loss": 0.1066, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5511684125705076, | |
| "grad_norm": 1.3155473470687866, | |
| "learning_rate": 9.091541294167214e-06, | |
| "loss": 0.0958, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5543916196615633, | |
| "grad_norm": 1.4708001613616943, | |
| "learning_rate": 9.075302059698696e-06, | |
| "loss": 0.1063, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5576148267526189, | |
| "grad_norm": 1.3062944412231445, | |
| "learning_rate": 9.05893373935049e-06, | |
| "loss": 0.1009, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5608380338436745, | |
| "grad_norm": 1.3801825046539307, | |
| "learning_rate": 9.0424368515919e-06, | |
| "loss": 0.1042, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5640612409347301, | |
| "grad_norm": 1.2434556484222412, | |
| "learning_rate": 9.02581191896463e-06, | |
| "loss": 0.1027, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5672844480257857, | |
| "grad_norm": 1.4129581451416016, | |
| "learning_rate": 9.00905946806622e-06, | |
| "loss": 0.1028, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5705076551168412, | |
| "grad_norm": 1.7679858207702637, | |
| "learning_rate": 8.992180029533378e-06, | |
| "loss": 0.1044, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5737308622078968, | |
| "grad_norm": 1.4008570909500122, | |
| "learning_rate": 8.975174138025165e-06, | |
| "loss": 0.0988, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5769540692989524, | |
| "grad_norm": 1.1371465921401978, | |
| "learning_rate": 8.958042332206059e-06, | |
| "loss": 0.0977, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.580177276390008, | |
| "grad_norm": 1.4827523231506348, | |
| "learning_rate": 8.940785154728899e-06, | |
| "loss": 0.097, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5834004834810637, | |
| "grad_norm": 1.5484329462051392, | |
| "learning_rate": 8.92340315221769e-06, | |
| "loss": 0.1049, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5866236905721193, | |
| "grad_norm": 1.267395257949829, | |
| "learning_rate": 8.905896875250291e-06, | |
| "loss": 0.0943, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5898468976631749, | |
| "grad_norm": 2.0672824382781982, | |
| "learning_rate": 8.888266878340979e-06, | |
| "loss": 0.0984, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5930701047542305, | |
| "grad_norm": 1.3846348524093628, | |
| "learning_rate": 8.870513719922873e-06, | |
| "loss": 0.1047, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5962933118452861, | |
| "grad_norm": 1.162631630897522, | |
| "learning_rate": 8.85263796233026e-06, | |
| "loss": 0.1067, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5995165189363416, | |
| "grad_norm": 1.2754584550857544, | |
| "learning_rate": 8.834640171780777e-06, | |
| "loss": 0.0959, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6027397260273972, | |
| "grad_norm": 1.383272409439087, | |
| "learning_rate": 8.816520918357473e-06, | |
| "loss": 0.1063, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.6059629331184528, | |
| "grad_norm": 1.6662758588790894, | |
| "learning_rate": 8.798280775990751e-06, | |
| "loss": 0.1024, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6091861402095085, | |
| "grad_norm": 1.9114925861358643, | |
| "learning_rate": 8.7799203224402e-06, | |
| "loss": 0.1054, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.6124093473005641, | |
| "grad_norm": 1.4166219234466553, | |
| "learning_rate": 8.761440139276279e-06, | |
| "loss": 0.1077, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6156325543916197, | |
| "grad_norm": 1.9428759813308716, | |
| "learning_rate": 8.742840811861901e-06, | |
| "loss": 0.1044, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.6188557614826753, | |
| "grad_norm": 1.5213385820388794, | |
| "learning_rate": 8.724122929333904e-06, | |
| "loss": 0.1128, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6220789685737309, | |
| "grad_norm": 1.2538460493087769, | |
| "learning_rate": 8.705287084584369e-06, | |
| "loss": 0.0963, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.6253021756647864, | |
| "grad_norm": 0.9515339136123657, | |
| "learning_rate": 8.68633387424185e-06, | |
| "loss": 0.104, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.628525382755842, | |
| "grad_norm": 1.6763734817504883, | |
| "learning_rate": 8.667263898652485e-06, | |
| "loss": 0.0975, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6317485898468976, | |
| "grad_norm": 1.9303100109100342, | |
| "learning_rate": 8.648077761860962e-06, | |
| "loss": 0.0936, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6349717969379532, | |
| "grad_norm": 1.3960262537002563, | |
| "learning_rate": 8.6287760715914e-06, | |
| "loss": 0.1018, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6381950040290089, | |
| "grad_norm": 1.3512085676193237, | |
| "learning_rate": 8.609359439228092e-06, | |
| "loss": 0.1051, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6414182111200645, | |
| "grad_norm": 1.3363274335861206, | |
| "learning_rate": 8.589828479796138e-06, | |
| "loss": 0.1026, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6446414182111201, | |
| "grad_norm": 1.2143882513046265, | |
| "learning_rate": 8.570183811941973e-06, | |
| "loss": 0.0997, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6478646253021757, | |
| "grad_norm": 1.5231553316116333, | |
| "learning_rate": 8.550426057913758e-06, | |
| "loss": 0.0971, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.6510878323932313, | |
| "grad_norm": 1.5069528818130493, | |
| "learning_rate": 8.53055584354169e-06, | |
| "loss": 0.0968, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6543110394842868, | |
| "grad_norm": 1.9453926086425781, | |
| "learning_rate": 8.510573798218153e-06, | |
| "loss": 0.1056, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.6575342465753424, | |
| "grad_norm": 1.6074435710906982, | |
| "learning_rate": 8.490480554877804e-06, | |
| "loss": 0.1005, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.660757453666398, | |
| "grad_norm": 1.4784128665924072, | |
| "learning_rate": 8.47027674997751e-06, | |
| "loss": 0.091, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6639806607574537, | |
| "grad_norm": 1.3281731605529785, | |
| "learning_rate": 8.449963023476198e-06, | |
| "loss": 0.1007, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6672038678485093, | |
| "grad_norm": 1.3868046998977661, | |
| "learning_rate": 8.429540018814581e-06, | |
| "loss": 0.1023, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.6704270749395649, | |
| "grad_norm": 1.4011777639389038, | |
| "learning_rate": 8.409008382894771e-06, | |
| "loss": 0.0972, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6736502820306205, | |
| "grad_norm": 1.2864456176757812, | |
| "learning_rate": 8.388368766059798e-06, | |
| "loss": 0.1024, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.6768734891216761, | |
| "grad_norm": 1.8163318634033203, | |
| "learning_rate": 8.367621822073004e-06, | |
| "loss": 0.0942, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6800966962127317, | |
| "grad_norm": 1.1266424655914307, | |
| "learning_rate": 8.346768208097339e-06, | |
| "loss": 0.0997, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6833199033037872, | |
| "grad_norm": 1.268912672996521, | |
| "learning_rate": 8.325808584674539e-06, | |
| "loss": 0.0954, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6865431103948428, | |
| "grad_norm": 1.9696354866027832, | |
| "learning_rate": 8.304743615704207e-06, | |
| "loss": 0.1056, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.6897663174858985, | |
| "grad_norm": 1.47492253780365, | |
| "learning_rate": 8.283573968422792e-06, | |
| "loss": 0.103, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6929895245769541, | |
| "grad_norm": 1.654740810394287, | |
| "learning_rate": 8.262300313382431e-06, | |
| "loss": 0.0951, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6962127316680097, | |
| "grad_norm": 1.3860782384872437, | |
| "learning_rate": 8.240923324429742e-06, | |
| "loss": 0.1013, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6994359387590653, | |
| "grad_norm": 1.9896957874298096, | |
| "learning_rate": 8.219443678684448e-06, | |
| "loss": 0.095, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.7026591458501209, | |
| "grad_norm": 1.5903962850570679, | |
| "learning_rate": 8.197862056517954e-06, | |
| "loss": 0.1025, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 1.1547088623046875, | |
| "learning_rate": 8.176179141531774e-06, | |
| "loss": 0.1011, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.709105560032232, | |
| "grad_norm": 1.4623602628707886, | |
| "learning_rate": 8.154395620535899e-06, | |
| "loss": 0.1015, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7123287671232876, | |
| "grad_norm": 1.5208735466003418, | |
| "learning_rate": 8.132512183527027e-06, | |
| "loss": 0.1018, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.7155519742143432, | |
| "grad_norm": 1.1935063600540161, | |
| "learning_rate": 8.110529523666712e-06, | |
| "loss": 0.1022, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7187751813053989, | |
| "grad_norm": 1.2939246892929077, | |
| "learning_rate": 8.088448337259416e-06, | |
| "loss": 0.1049, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7219983883964545, | |
| "grad_norm": 1.3576562404632568, | |
| "learning_rate": 8.066269323730435e-06, | |
| "loss": 0.0964, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7252215954875101, | |
| "grad_norm": 1.2397035360336304, | |
| "learning_rate": 8.043993185603764e-06, | |
| "loss": 0.0949, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7284448025785657, | |
| "grad_norm": 1.6794919967651367, | |
| "learning_rate": 8.021620628479833e-06, | |
| "loss": 0.0941, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.7316680096696213, | |
| "grad_norm": 1.9329454898834229, | |
| "learning_rate": 7.99915236101316e-06, | |
| "loss": 0.0929, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.7348912167606769, | |
| "grad_norm": 1.2772644758224487, | |
| "learning_rate": 7.976589094889903e-06, | |
| "loss": 0.1004, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7381144238517324, | |
| "grad_norm": 1.1697113513946533, | |
| "learning_rate": 7.953931544805324e-06, | |
| "loss": 0.0905, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.741337630942788, | |
| "grad_norm": 1.7702858448028564, | |
| "learning_rate": 7.931180428441135e-06, | |
| "loss": 0.1052, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7445608380338437, | |
| "grad_norm": 1.3432146310806274, | |
| "learning_rate": 7.908336466442786e-06, | |
| "loss": 0.0919, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.7477840451248993, | |
| "grad_norm": 1.2473376989364624, | |
| "learning_rate": 7.885400382396621e-06, | |
| "loss": 0.0961, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7510072522159549, | |
| "grad_norm": 2.3682289123535156, | |
| "learning_rate": 7.862372902806971e-06, | |
| "loss": 0.1042, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.7542304593070105, | |
| "grad_norm": 1.86495041847229, | |
| "learning_rate": 7.839254757073133e-06, | |
| "loss": 0.1009, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7574536663980661, | |
| "grad_norm": 1.7069085836410522, | |
| "learning_rate": 7.816046677466269e-06, | |
| "loss": 0.1007, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.7606768734891217, | |
| "grad_norm": 1.8137654066085815, | |
| "learning_rate": 7.792749399106214e-06, | |
| "loss": 0.0927, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7639000805801772, | |
| "grad_norm": 1.1234313249588013, | |
| "learning_rate": 7.769363659938186e-06, | |
| "loss": 0.0931, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.7671232876712328, | |
| "grad_norm": 1.3791865110397339, | |
| "learning_rate": 7.745890200709416e-06, | |
| "loss": 0.0973, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.7703464947622884, | |
| "grad_norm": 1.2439701557159424, | |
| "learning_rate": 7.722329764945682e-06, | |
| "loss": 0.1004, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.7735697018533441, | |
| "grad_norm": 1.2246594429016113, | |
| "learning_rate": 7.698683098927756e-06, | |
| "loss": 0.1078, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7767929089443997, | |
| "grad_norm": 1.09011709690094, | |
| "learning_rate": 7.674950951667773e-06, | |
| "loss": 0.0939, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.7800161160354553, | |
| "grad_norm": 1.0550169944763184, | |
| "learning_rate": 7.651134074885495e-06, | |
| "loss": 0.0982, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7832393231265109, | |
| "grad_norm": 1.935786247253418, | |
| "learning_rate": 7.627233222984514e-06, | |
| "loss": 0.0973, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.7864625302175665, | |
| "grad_norm": 1.7208002805709839, | |
| "learning_rate": 7.603249153028335e-06, | |
| "loss": 0.098, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7896857373086221, | |
| "grad_norm": 1.3723320960998535, | |
| "learning_rate": 7.579182624716422e-06, | |
| "loss": 0.1035, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.7929089443996776, | |
| "grad_norm": 1.1093083620071411, | |
| "learning_rate": 7.555034400360115e-06, | |
| "loss": 0.0906, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.7961321514907332, | |
| "grad_norm": 1.0704550743103027, | |
| "learning_rate": 7.530805244858492e-06, | |
| "loss": 0.0937, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.7993553585817889, | |
| "grad_norm": 1.111206293106079, | |
| "learning_rate": 7.506495925674135e-06, | |
| "loss": 0.11, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8025785656728445, | |
| "grad_norm": 1.0680865049362183, | |
| "learning_rate": 7.482107212808829e-06, | |
| "loss": 0.0978, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.8058017727639001, | |
| "grad_norm": 1.2233189344406128, | |
| "learning_rate": 7.457639878779164e-06, | |
| "loss": 0.0901, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8090249798549557, | |
| "grad_norm": 1.1432982683181763, | |
| "learning_rate": 7.433094698592069e-06, | |
| "loss": 0.1055, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.8122481869460113, | |
| "grad_norm": 1.0968226194381714, | |
| "learning_rate": 7.4084724497202675e-06, | |
| "loss": 0.0893, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.8154713940370669, | |
| "grad_norm": 1.416164755821228, | |
| "learning_rate": 7.383773912077639e-06, | |
| "loss": 0.1048, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.8186946011281225, | |
| "grad_norm": 1.4907201528549194, | |
| "learning_rate": 7.3589998679945274e-06, | |
| "loss": 0.0957, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 1.1113173961639404, | |
| "learning_rate": 7.3341511021929565e-06, | |
| "loss": 0.0891, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8251410153102336, | |
| "grad_norm": 1.5791008472442627, | |
| "learning_rate": 7.30922840176177e-06, | |
| "loss": 0.0938, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.8283642224012893, | |
| "grad_norm": 1.1519147157669067, | |
| "learning_rate": 7.2842325561317064e-06, | |
| "loss": 0.0937, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.8315874294923449, | |
| "grad_norm": 1.0236375331878662, | |
| "learning_rate": 7.259164357050389e-06, | |
| "loss": 0.0859, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8348106365834005, | |
| "grad_norm": 1.629459023475647, | |
| "learning_rate": 7.234024598557248e-06, | |
| "loss": 0.0902, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.8380338436744561, | |
| "grad_norm": 1.171321988105774, | |
| "learning_rate": 7.208814076958374e-06, | |
| "loss": 0.0887, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.8412570507655117, | |
| "grad_norm": 1.2966508865356445, | |
| "learning_rate": 7.183533590801286e-06, | |
| "loss": 0.0958, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.8444802578565673, | |
| "grad_norm": 1.7354567050933838, | |
| "learning_rate": 7.158183940849644e-06, | |
| "loss": 0.0967, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.8477034649476228, | |
| "grad_norm": 2.0169782638549805, | |
| "learning_rate": 7.132765930057886e-06, | |
| "loss": 0.0972, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.8509266720386784, | |
| "grad_norm": 1.147071123123169, | |
| "learning_rate": 7.107280363545785e-06, | |
| "loss": 0.0976, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8541498791297341, | |
| "grad_norm": 1.391392707824707, | |
| "learning_rate": 7.08172804857296e-06, | |
| "loss": 0.0899, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.8573730862207897, | |
| "grad_norm": 1.1355257034301758, | |
| "learning_rate": 7.056109794513292e-06, | |
| "loss": 0.1036, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.8605962933118453, | |
| "grad_norm": 0.9366469979286194, | |
| "learning_rate": 7.030426412829296e-06, | |
| "loss": 0.088, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.8638195004029009, | |
| "grad_norm": 1.1631442308425903, | |
| "learning_rate": 7.004678717046419e-06, | |
| "loss": 0.0891, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8670427074939565, | |
| "grad_norm": 1.429606318473816, | |
| "learning_rate": 6.978867522727264e-06, | |
| "loss": 0.1039, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.8702659145850121, | |
| "grad_norm": 1.1730060577392578, | |
| "learning_rate": 6.952993647445762e-06, | |
| "loss": 0.0931, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8734891216760677, | |
| "grad_norm": 1.1138707399368286, | |
| "learning_rate": 6.927057910761273e-06, | |
| "loss": 0.0982, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 1.2846705913543701, | |
| "learning_rate": 6.9010611341926286e-06, | |
| "loss": 0.0937, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.8799355358581789, | |
| "grad_norm": 1.1566565036773682, | |
| "learning_rate": 6.875004141192108e-06, | |
| "loss": 0.092, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.8831587429492345, | |
| "grad_norm": 1.17435622215271, | |
| "learning_rate": 6.848887757119358e-06, | |
| "loss": 0.0996, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.8863819500402901, | |
| "grad_norm": 1.250361442565918, | |
| "learning_rate": 6.822712809215247e-06, | |
| "loss": 0.099, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.8896051571313457, | |
| "grad_norm": 1.0037554502487183, | |
| "learning_rate": 6.7964801265756616e-06, | |
| "loss": 0.0873, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8928283642224013, | |
| "grad_norm": 1.2579954862594604, | |
| "learning_rate": 6.770190540125246e-06, | |
| "loss": 0.0898, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.8960515713134569, | |
| "grad_norm": 1.5266188383102417, | |
| "learning_rate": 6.74384488259108e-06, | |
| "loss": 0.094, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.8992747784045125, | |
| "grad_norm": 1.5171687602996826, | |
| "learning_rate": 6.71744398847631e-06, | |
| "loss": 0.0924, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.9024979854955681, | |
| "grad_norm": 1.0802000761032104, | |
| "learning_rate": 6.690988694033707e-06, | |
| "loss": 0.0941, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9057211925866236, | |
| "grad_norm": 1.210917353630066, | |
| "learning_rate": 6.664479837239182e-06, | |
| "loss": 0.0885, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.9089443996776793, | |
| "grad_norm": 0.9679683446884155, | |
| "learning_rate": 6.63791825776524e-06, | |
| "loss": 0.0929, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.9121676067687349, | |
| "grad_norm": 1.1072417497634888, | |
| "learning_rate": 6.611304796954391e-06, | |
| "loss": 0.0907, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.9153908138597905, | |
| "grad_norm": 1.1007254123687744, | |
| "learning_rate": 6.58464029779249e-06, | |
| "loss": 0.0834, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9186140209508461, | |
| "grad_norm": 1.3668423891067505, | |
| "learning_rate": 6.557925604882045e-06, | |
| "loss": 0.0996, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.9218372280419017, | |
| "grad_norm": 1.0063837766647339, | |
| "learning_rate": 6.531161564415455e-06, | |
| "loss": 0.0967, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9250604351329573, | |
| "grad_norm": 2.1310322284698486, | |
| "learning_rate": 6.504349024148215e-06, | |
| "loss": 0.0891, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.9282836422240129, | |
| "grad_norm": 1.7086719274520874, | |
| "learning_rate": 6.4774888333720565e-06, | |
| "loss": 0.091, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9315068493150684, | |
| "grad_norm": 1.2807854413986206, | |
| "learning_rate": 6.450581842888051e-06, | |
| "loss": 0.0945, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.934730056406124, | |
| "grad_norm": 1.2100774049758911, | |
| "learning_rate": 6.423628904979655e-06, | |
| "loss": 0.0927, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.9379532634971797, | |
| "grad_norm": 1.079419732093811, | |
| "learning_rate": 6.396630873385723e-06, | |
| "loss": 0.0928, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 1.2331191301345825, | |
| "learning_rate": 6.369588603273453e-06, | |
| "loss": 0.0902, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.9443996776792909, | |
| "grad_norm": 1.521287441253662, | |
| "learning_rate": 6.342502951211314e-06, | |
| "loss": 0.0906, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.9476228847703465, | |
| "grad_norm": 1.2149112224578857, | |
| "learning_rate": 6.315374775141897e-06, | |
| "loss": 0.088, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.9508460918614021, | |
| "grad_norm": 1.041596531867981, | |
| "learning_rate": 6.288204934354753e-06, | |
| "loss": 0.0903, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.9540692989524577, | |
| "grad_norm": 1.4294019937515259, | |
| "learning_rate": 6.26099428945917e-06, | |
| "loss": 0.0827, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.9572925060435133, | |
| "grad_norm": 1.353190302848816, | |
| "learning_rate": 6.2337437023569105e-06, | |
| "loss": 0.0892, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.9605157131345688, | |
| "grad_norm": 1.0870195627212524, | |
| "learning_rate": 6.206454036214914e-06, | |
| "loss": 0.1028, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.9637389202256245, | |
| "grad_norm": 0.9204868674278259, | |
| "learning_rate": 6.179126155437957e-06, | |
| "loss": 0.0929, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.9669621273166801, | |
| "grad_norm": 1.2136569023132324, | |
| "learning_rate": 6.151760925641268e-06, | |
| "loss": 0.0871, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9701853344077357, | |
| "grad_norm": 1.2971879243850708, | |
| "learning_rate": 6.124359213623114e-06, | |
| "loss": 0.0979, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.9734085414987913, | |
| "grad_norm": 1.1342357397079468, | |
| "learning_rate": 6.096921887337342e-06, | |
| "loss": 0.0821, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9766317485898469, | |
| "grad_norm": 1.1401841640472412, | |
| "learning_rate": 6.0694498158658886e-06, | |
| "loss": 0.0853, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.9798549556809025, | |
| "grad_norm": 1.0659375190734863, | |
| "learning_rate": 6.041943869391248e-06, | |
| "loss": 0.092, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9830781627719581, | |
| "grad_norm": 1.2414722442626953, | |
| "learning_rate": 6.0144049191689095e-06, | |
| "loss": 0.0943, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.9863013698630136, | |
| "grad_norm": 1.3391859531402588, | |
| "learning_rate": 5.9868338374997645e-06, | |
| "loss": 0.0846, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.9895245769540693, | |
| "grad_norm": 1.275088906288147, | |
| "learning_rate": 5.959231497702473e-06, | |
| "loss": 0.0976, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.9927477840451249, | |
| "grad_norm": 1.732020616531372, | |
| "learning_rate": 5.9315987740857995e-06, | |
| "loss": 0.0906, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.9959709911361805, | |
| "grad_norm": 1.2046090364456177, | |
| "learning_rate": 5.903936541920924e-06, | |
| "loss": 0.092, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.9991941982272361, | |
| "grad_norm": 0.9802199006080627, | |
| "learning_rate": 5.876245677413712e-06, | |
| "loss": 0.0815, | |
| "step": 1550 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3102, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 776, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.912061943183573e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |