| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 666, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015015015015015015, |
| "grad_norm": 1.168221116065979, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.4071, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03003003003003003, |
| "grad_norm": 0.9412075281143188, |
| "learning_rate": 3.2142857142857143e-06, |
| "loss": 1.4071, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04504504504504504, |
| "grad_norm": 0.5820316672325134, |
| "learning_rate": 4.9999999999999996e-06, |
| "loss": 1.4308, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06006006006006006, |
| "grad_norm": 0.7234917283058167, |
| "learning_rate": 6.785714285714286e-06, |
| "loss": 1.403, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07507507507507508, |
| "grad_norm": 0.7134122252464294, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.3703, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.09009009009009009, |
| "grad_norm": 0.5349926352500916, |
| "learning_rate": 1.0357142857142857e-05, |
| "loss": 1.3204, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10510510510510511, |
| "grad_norm": 0.5786008238792419, |
| "learning_rate": 1.2142857142857144e-05, |
| "loss": 1.2867, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.12012012012012012, |
| "grad_norm": 0.506987452507019, |
| "learning_rate": 1.3928571428571429e-05, |
| "loss": 1.3269, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13513513513513514, |
| "grad_norm": 0.5051132440567017, |
| "learning_rate": 1.5714285714285715e-05, |
| "loss": 1.3177, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15015015015015015, |
| "grad_norm": 0.4060744643211365, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 1.1999, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16516516516516516, |
| "grad_norm": 0.5537149906158447, |
| "learning_rate": 1.928571428571429e-05, |
| "loss": 1.304, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.18018018018018017, |
| "grad_norm": 0.5138696432113647, |
| "learning_rate": 2.107142857142857e-05, |
| "loss": 1.2876, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.19519519519519518, |
| "grad_norm": 0.501071035861969, |
| "learning_rate": 2.2857142857142858e-05, |
| "loss": 1.2592, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.21021021021021022, |
| "grad_norm": 0.505533754825592, |
| "learning_rate": 2.464285714285714e-05, |
| "loss": 1.2247, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22522522522522523, |
| "grad_norm": 0.5126124620437622, |
| "learning_rate": 2.6428571428571428e-05, |
| "loss": 1.2843, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.24024024024024024, |
| "grad_norm": 0.5336039066314697, |
| "learning_rate": 2.8214285714285714e-05, |
| "loss": 1.2725, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2552552552552553, |
| "grad_norm": 0.5905837416648865, |
| "learning_rate": 3e-05, |
| "loss": 1.2371, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2702702702702703, |
| "grad_norm": 1.3594383001327515, |
| "learning_rate": 2.9999259655754585e-05, |
| "loss": 1.1873, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2852852852852853, |
| "grad_norm": 0.5035964846611023, |
| "learning_rate": 2.9997038696099626e-05, |
| "loss": 1.1599, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3003003003003003, |
| "grad_norm": 0.5989760756492615, |
| "learning_rate": 2.9993337340271743e-05, |
| "loss": 1.2299, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3153153153153153, |
| "grad_norm": 0.6236782073974609, |
| "learning_rate": 2.9988155953641272e-05, |
| "loss": 1.125, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3303303303303303, |
| "grad_norm": 0.635779082775116, |
| "learning_rate": 2.998149504767618e-05, |
| "loss": 1.1218, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.34534534534534533, |
| "grad_norm": 0.6790189146995544, |
| "learning_rate": 2.9973355279891595e-05, |
| "loss": 1.1742, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.36036036036036034, |
| "grad_norm": 0.5752054452896118, |
| "learning_rate": 2.996373745378487e-05, |
| "loss": 1.1375, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.37537537537537535, |
| "grad_norm": 0.6132190227508545, |
| "learning_rate": 2.995264251875631e-05, |
| "loss": 1.1516, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.39039039039039036, |
| "grad_norm": 0.6133520603179932, |
| "learning_rate": 2.9940071570015415e-05, |
| "loss": 1.0994, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.40540540540540543, |
| "grad_norm": 0.634513795375824, |
| "learning_rate": 2.9926025848472798e-05, |
| "loss": 1.101, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.42042042042042044, |
| "grad_norm": 0.6447832584381104, |
| "learning_rate": 2.991050674061767e-05, |
| "loss": 1.059, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.43543543543543545, |
| "grad_norm": 0.671330451965332, |
| "learning_rate": 2.9893515778380997e-05, |
| "loss": 1.1333, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.45045045045045046, |
| "grad_norm": 0.7105020880699158, |
| "learning_rate": 2.9875054638984253e-05, |
| "loss": 1.0852, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.46546546546546547, |
| "grad_norm": 0.6580174565315247, |
| "learning_rate": 2.9855125144773885e-05, |
| "loss": 1.0793, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4804804804804805, |
| "grad_norm": 0.6566060185432434, |
| "learning_rate": 2.9833729263041407e-05, |
| "loss": 1.0913, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4954954954954955, |
| "grad_norm": 0.7263543605804443, |
| "learning_rate": 2.9810869105829202e-05, |
| "loss": 1.0181, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5105105105105106, |
| "grad_norm": 0.7267442345619202, |
| "learning_rate": 2.9786546929722055e-05, |
| "loss": 1.032, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5255255255255256, |
| "grad_norm": 0.8429233431816101, |
| "learning_rate": 2.9760765135624387e-05, |
| "loss": 0.9719, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 0.7601214051246643, |
| "learning_rate": 2.9733526268523238e-05, |
| "loss": 1.0216, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.7685151100158691, |
| "learning_rate": 2.9704833017237077e-05, |
| "loss": 0.9199, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5705705705705706, |
| "grad_norm": 0.9268845915794373, |
| "learning_rate": 2.967468821415038e-05, |
| "loss": 1.0086, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5855855855855856, |
| "grad_norm": 0.9145250916481018, |
| "learning_rate": 2.9643094834933997e-05, |
| "loss": 0.946, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6006006006006006, |
| "grad_norm": 0.9080163240432739, |
| "learning_rate": 2.9610055998251473e-05, |
| "loss": 0.9132, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6156156156156156, |
| "grad_norm": 0.9134447574615479, |
| "learning_rate": 2.9575574965451156e-05, |
| "loss": 0.8778, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6306306306306306, |
| "grad_norm": 0.8887650370597839, |
| "learning_rate": 2.9539655140244263e-05, |
| "loss": 0.9295, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6456456456456456, |
| "grad_norm": 1.0790126323699951, |
| "learning_rate": 2.9502300068368922e-05, |
| "loss": 0.8752, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6606606606606606, |
| "grad_norm": 0.9577323794364929, |
| "learning_rate": 2.946351343724013e-05, |
| "loss": 0.8857, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6756756756756757, |
| "grad_norm": 0.9615657329559326, |
| "learning_rate": 2.9423299075585775e-05, |
| "loss": 0.9026, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6906906906906907, |
| "grad_norm": 0.8258851766586304, |
| "learning_rate": 2.9381660953068686e-05, |
| "loss": 0.9374, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7057057057057057, |
| "grad_norm": 0.8674429655075073, |
| "learning_rate": 2.9338603179894784e-05, |
| "loss": 0.9496, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7207207207207207, |
| "grad_norm": 0.9960718750953674, |
| "learning_rate": 2.929413000640735e-05, |
| "loss": 0.8618, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7357357357357357, |
| "grad_norm": 0.9150928258895874, |
| "learning_rate": 2.9248245822667457e-05, |
| "loss": 0.9003, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7507507507507507, |
| "grad_norm": 0.9323129057884216, |
| "learning_rate": 2.920095515802062e-05, |
| "loss": 0.8615, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7657657657657657, |
| "grad_norm": 1.002323031425476, |
| "learning_rate": 2.9152262680649704e-05, |
| "loss": 0.876, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7807807807807807, |
| "grad_norm": 1.0666371583938599, |
| "learning_rate": 2.9102173197114094e-05, |
| "loss": 0.8508, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7957957957957958, |
| "grad_norm": 0.9196386933326721, |
| "learning_rate": 2.9050691651875243e-05, |
| "loss": 0.8039, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 0.9974318742752075, |
| "learning_rate": 2.8997823126808583e-05, |
| "loss": 0.832, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8258258258258259, |
| "grad_norm": 0.93741375207901, |
| "learning_rate": 2.894357284070189e-05, |
| "loss": 0.8235, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8408408408408409, |
| "grad_norm": 1.0261346101760864, |
| "learning_rate": 2.888794614874011e-05, |
| "loss": 0.803, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8558558558558559, |
| "grad_norm": 0.9743596911430359, |
| "learning_rate": 2.883094854197676e-05, |
| "loss": 0.7929, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8708708708708709, |
| "grad_norm": 1.0855077505111694, |
| "learning_rate": 2.877258564679185e-05, |
| "loss": 0.7591, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8858858858858859, |
| "grad_norm": 0.9973369836807251, |
| "learning_rate": 2.8712863224336533e-05, |
| "loss": 0.7941, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9009009009009009, |
| "grad_norm": 1.0347039699554443, |
| "learning_rate": 2.8651787169964374e-05, |
| "loss": 0.7256, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9159159159159159, |
| "grad_norm": 1.189362645149231, |
| "learning_rate": 2.8589363512649432e-05, |
| "loss": 0.7328, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9309309309309309, |
| "grad_norm": 1.0265212059020996, |
| "learning_rate": 2.8525598414391104e-05, |
| "loss": 0.7552, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9459459459459459, |
| "grad_norm": 1.0244401693344116, |
| "learning_rate": 2.846049816960585e-05, |
| "loss": 0.7563, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.960960960960961, |
| "grad_norm": 1.2380430698394775, |
| "learning_rate": 2.83940692045059e-05, |
| "loss": 0.779, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.975975975975976, |
| "grad_norm": 1.1021960973739624, |
| "learning_rate": 2.8326318076464852e-05, |
| "loss": 0.7572, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.990990990990991, |
| "grad_norm": 1.007034182548523, |
| "learning_rate": 2.8257251473370408e-05, |
| "loss": 0.7074, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.006006006006006, |
| "grad_norm": 1.062544345855713, |
| "learning_rate": 2.8186876212964185e-05, |
| "loss": 0.6919, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.021021021021021, |
| "grad_norm": 1.149069905281067, |
| "learning_rate": 2.811519924216873e-05, |
| "loss": 0.6393, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0360360360360361, |
| "grad_norm": 1.0859824419021606, |
| "learning_rate": 2.8042227636401757e-05, |
| "loss": 0.6682, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.0510510510510511, |
| "grad_norm": 1.0685064792633057, |
| "learning_rate": 2.796796859887772e-05, |
| "loss": 0.6079, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0660660660660661, |
| "grad_norm": 1.0562701225280762, |
| "learning_rate": 2.7892429459896766e-05, |
| "loss": 0.6154, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0810810810810811, |
| "grad_norm": 1.1275193691253662, |
| "learning_rate": 2.7815617676121138e-05, |
| "loss": 0.6308, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0960960960960962, |
| "grad_norm": 1.0415915250778198, |
| "learning_rate": 2.773754082983912e-05, |
| "loss": 0.6441, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 1.1084564924240112, |
| "learning_rate": 2.7658206628216556e-05, |
| "loss": 0.5814, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1261261261261262, |
| "grad_norm": 1.0700006484985352, |
| "learning_rate": 2.7577622902536064e-05, |
| "loss": 0.5895, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.1411411411411412, |
| "grad_norm": 1.1039676666259766, |
| "learning_rate": 2.7495797607423986e-05, |
| "loss": 0.6004, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1561561561561562, |
| "grad_norm": 1.235087513923645, |
| "learning_rate": 2.7412738820065173e-05, |
| "loss": 0.6174, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.1711711711711712, |
| "grad_norm": 1.1676405668258667, |
| "learning_rate": 2.732845473940566e-05, |
| "loss": 0.5685, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.1861861861861862, |
| "grad_norm": 1.2204903364181519, |
| "learning_rate": 2.7242953685343327e-05, |
| "loss": 0.6005, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.2012012012012012, |
| "grad_norm": 1.1344414949417114, |
| "learning_rate": 2.7156244097906614e-05, |
| "loss": 0.6182, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2162162162162162, |
| "grad_norm": 1.2518144845962524, |
| "learning_rate": 2.7068334536421408e-05, |
| "loss": 0.5759, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.2312312312312312, |
| "grad_norm": 1.248169183731079, |
| "learning_rate": 2.6979233678666102e-05, |
| "loss": 0.5642, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.2462462462462462, |
| "grad_norm": 1.1738685369491577, |
| "learning_rate": 2.6888950320014993e-05, |
| "loss": 0.5953, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.2612612612612613, |
| "grad_norm": 1.3666422367095947, |
| "learning_rate": 2.6797493372570098e-05, |
| "loss": 0.5547, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.2762762762762763, |
| "grad_norm": 1.2075399160385132, |
| "learning_rate": 2.6704871864281377e-05, |
| "loss": 0.5817, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.2912912912912913, |
| "grad_norm": 1.1854655742645264, |
| "learning_rate": 2.6611094938055586e-05, |
| "loss": 0.5291, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3063063063063063, |
| "grad_norm": 1.2468748092651367, |
| "learning_rate": 2.651617185085375e-05, |
| "loss": 0.5862, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.3213213213213213, |
| "grad_norm": 1.2312390804290771, |
| "learning_rate": 2.642011197277738e-05, |
| "loss": 0.5579, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3363363363363363, |
| "grad_norm": 1.2079895734786987, |
| "learning_rate": 2.6322924786143544e-05, |
| "loss": 0.5381, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.3513513513513513, |
| "grad_norm": 1.2430604696273804, |
| "learning_rate": 2.6224619884548814e-05, |
| "loss": 0.5199, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3663663663663663, |
| "grad_norm": 1.3518890142440796, |
| "learning_rate": 2.612520697192229e-05, |
| "loss": 0.5885, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.3813813813813813, |
| "grad_norm": 1.2342760562896729, |
| "learning_rate": 2.6024695861567675e-05, |
| "loss": 0.5597, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3963963963963963, |
| "grad_norm": 1.205000638961792, |
| "learning_rate": 2.592309647519458e-05, |
| "loss": 0.5427, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.4114114114114114, |
| "grad_norm": 1.304811716079712, |
| "learning_rate": 2.5820418841939152e-05, |
| "loss": 0.5439, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4264264264264264, |
| "grad_norm": 1.3263685703277588, |
| "learning_rate": 2.5716673097374047e-05, |
| "loss": 0.5013, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.4414414414414414, |
| "grad_norm": 1.227352261543274, |
| "learning_rate": 2.5611869482507924e-05, |
| "loss": 0.5695, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.4564564564564564, |
| "grad_norm": 1.2734203338623047, |
| "learning_rate": 2.550601834277454e-05, |
| "loss": 0.5228, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.4714714714714714, |
| "grad_norm": 1.147456169128418, |
| "learning_rate": 2.539913012701152e-05, |
| "loss": 0.5174, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.4864864864864864, |
| "grad_norm": 1.2142225503921509, |
| "learning_rate": 2.529121538642892e-05, |
| "loss": 0.4837, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.5015015015015014, |
| "grad_norm": 1.177823543548584, |
| "learning_rate": 2.51822847735677e-05, |
| "loss": 0.5203, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.5165165165165164, |
| "grad_norm": 1.3306628465652466, |
| "learning_rate": 2.5072349041248175e-05, |
| "loss": 0.5125, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.5315315315315314, |
| "grad_norm": 1.3492076396942139, |
| "learning_rate": 2.496141904150859e-05, |
| "loss": 0.5235, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.5465465465465464, |
| "grad_norm": 1.0597307682037354, |
| "learning_rate": 2.484950572453386e-05, |
| "loss": 0.5086, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.5615615615615615, |
| "grad_norm": 1.1566152572631836, |
| "learning_rate": 2.4736620137574686e-05, |
| "loss": 0.4862, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5765765765765765, |
| "grad_norm": 1.1374027729034424, |
| "learning_rate": 2.4622773423857032e-05, |
| "loss": 0.5468, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.5915915915915915, |
| "grad_norm": 1.0723894834518433, |
| "learning_rate": 2.4507976821482138e-05, |
| "loss": 0.523, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.6066066066066065, |
| "grad_norm": 1.4630274772644043, |
| "learning_rate": 2.4392241662317205e-05, |
| "loss": 0.4643, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.6216216216216215, |
| "grad_norm": 1.3217425346374512, |
| "learning_rate": 2.4275579370876772e-05, |
| "loss": 0.489, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.6366366366366365, |
| "grad_norm": 1.301561951637268, |
| "learning_rate": 2.4158001463194998e-05, |
| "loss": 0.4548, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.6516516516516515, |
| "grad_norm": 1.132656455039978, |
| "learning_rate": 2.4039519545688848e-05, |
| "loss": 0.4805, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1.1595160961151123, |
| "learning_rate": 2.392014531401244e-05, |
| "loss": 0.5021, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.6816816816816815, |
| "grad_norm": 1.075697660446167, |
| "learning_rate": 2.37998905519025e-05, |
| "loss": 0.4383, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.6966966966966965, |
| "grad_norm": 1.2260234355926514, |
| "learning_rate": 2.3678767130015174e-05, |
| "loss": 0.4359, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.7117117117117115, |
| "grad_norm": 1.1868109703063965, |
| "learning_rate": 2.3556787004754253e-05, |
| "loss": 0.4411, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.7267267267267268, |
| "grad_norm": 1.1722155809402466, |
| "learning_rate": 2.3433962217090904e-05, |
| "loss": 0.4586, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.7417417417417418, |
| "grad_norm": 1.1019175052642822, |
| "learning_rate": 2.3310304891375092e-05, |
| "loss": 0.4072, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.7567567567567568, |
| "grad_norm": 1.2503912448883057, |
| "learning_rate": 2.3185827234138756e-05, |
| "loss": 0.4602, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.7717717717717718, |
| "grad_norm": 1.1760993003845215, |
| "learning_rate": 2.306054153289085e-05, |
| "loss": 0.4525, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.7867867867867868, |
| "grad_norm": 1.2905851602554321, |
| "learning_rate": 2.2934460154904436e-05, |
| "loss": 0.4767, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.8018018018018018, |
| "grad_norm": 1.2083145380020142, |
| "learning_rate": 2.280759554599587e-05, |
| "loss": 0.4617, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.8168168168168168, |
| "grad_norm": 1.2210261821746826, |
| "learning_rate": 2.2679960229296244e-05, |
| "loss": 0.4468, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.8318318318318318, |
| "grad_norm": 1.238731026649475, |
| "learning_rate": 2.255156680401518e-05, |
| "loss": 0.4182, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.8468468468468469, |
| "grad_norm": 1.33894944190979, |
| "learning_rate": 2.242242794419715e-05, |
| "loss": 0.4642, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.8618618618618619, |
| "grad_norm": 1.2281477451324463, |
| "learning_rate": 2.2292556397470394e-05, |
| "loss": 0.445, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.8768768768768769, |
| "grad_norm": 1.1752314567565918, |
| "learning_rate": 2.2161964983788535e-05, |
| "loss": 0.3754, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.8918918918918919, |
| "grad_norm": 1.0684999227523804, |
| "learning_rate": 2.2030666594165135e-05, |
| "loss": 0.4132, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.906906906906907, |
| "grad_norm": 1.2152754068374634, |
| "learning_rate": 2.1898674189401148e-05, |
| "loss": 0.3813, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.921921921921922, |
| "grad_norm": 1.2044199705123901, |
| "learning_rate": 2.1766000798805542e-05, |
| "loss": 0.4362, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.936936936936937, |
| "grad_norm": 1.1659032106399536, |
| "learning_rate": 2.1632659518909156e-05, |
| "loss": 0.3985, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.951951951951952, |
| "grad_norm": 1.2562652826309204, |
| "learning_rate": 2.1498663512171885e-05, |
| "loss": 0.3964, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.966966966966967, |
| "grad_norm": 1.1590440273284912, |
| "learning_rate": 2.13640260056834e-05, |
| "loss": 0.3917, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.981981981981982, |
| "grad_norm": 1.2917598485946655, |
| "learning_rate": 2.1228760289857456e-05, |
| "loss": 0.3642, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.996996996996997, |
| "grad_norm": 1.40131413936615, |
| "learning_rate": 2.1092879717119955e-05, |
| "loss": 0.4154, |
| "step": 665 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 1665, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.581609998216069e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|