| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998780636507743, | |
| "eval_steps": 500, | |
| "global_step": 1025, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009754907938056335, | |
| "grad_norm": 4.439117513053022, | |
| "learning_rate": 9.615384615384617e-08, | |
| "loss": 0.3691, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001950981587611267, | |
| "grad_norm": 4.665976578311697, | |
| "learning_rate": 1.9230769230769234e-07, | |
| "loss": 0.3804, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0029264723814169005, | |
| "grad_norm": 4.458573970698933, | |
| "learning_rate": 2.884615384615385e-07, | |
| "loss": 0.3682, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.003901963175222534, | |
| "grad_norm": 4.21755765446574, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 0.3567, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.004877453969028167, | |
| "grad_norm": 4.1944148540591195, | |
| "learning_rate": 4.807692307692308e-07, | |
| "loss": 0.3773, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005852944762833801, | |
| "grad_norm": 3.9774353472382957, | |
| "learning_rate": 5.76923076923077e-07, | |
| "loss": 0.3502, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.006828435556639434, | |
| "grad_norm": 3.553247347541323, | |
| "learning_rate": 6.730769230769231e-07, | |
| "loss": 0.3614, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.007803926350445068, | |
| "grad_norm": 2.949302152008964, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 0.3463, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008779417144250701, | |
| "grad_norm": 2.799468109303019, | |
| "learning_rate": 8.653846153846154e-07, | |
| "loss": 0.3582, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.009754907938056334, | |
| "grad_norm": 2.6971355767992464, | |
| "learning_rate": 9.615384615384617e-07, | |
| "loss": 0.3277, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.010730398731861967, | |
| "grad_norm": 2.2435805544189256, | |
| "learning_rate": 1.0576923076923078e-06, | |
| "loss": 0.3411, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.011705889525667602, | |
| "grad_norm": 2.3858778520973933, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 0.3323, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.012681380319473235, | |
| "grad_norm": 2.325354329209325, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.3201, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.013656871113278868, | |
| "grad_norm": 2.268837198744112, | |
| "learning_rate": 1.3461538461538462e-06, | |
| "loss": 0.3458, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.014632361907084501, | |
| "grad_norm": 2.2354597650236263, | |
| "learning_rate": 1.4423076923076922e-06, | |
| "loss": 0.3057, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.015607852700890136, | |
| "grad_norm": 2.4354294587165968, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 0.2974, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.016583343494695767, | |
| "grad_norm": 1.9801626818691969, | |
| "learning_rate": 1.6346153846153848e-06, | |
| "loss": 0.2738, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.017558834288501402, | |
| "grad_norm": 1.7527920071956236, | |
| "learning_rate": 1.7307692307692308e-06, | |
| "loss": 0.268, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.018534325082307037, | |
| "grad_norm": 1.6175943655326306, | |
| "learning_rate": 1.826923076923077e-06, | |
| "loss": 0.2851, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.01950981587611267, | |
| "grad_norm": 1.8343780524193107, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 0.2773, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.020485306669918303, | |
| "grad_norm": 2.0687178014460055, | |
| "learning_rate": 2.0192307692307692e-06, | |
| "loss": 0.279, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.021460797463723934, | |
| "grad_norm": 1.850604452835117, | |
| "learning_rate": 2.1153846153846155e-06, | |
| "loss": 0.2743, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02243628825752957, | |
| "grad_norm": 2.149994632477382, | |
| "learning_rate": 2.211538461538462e-06, | |
| "loss": 0.2694, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.023411779051335204, | |
| "grad_norm": 1.8792389072954687, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 0.2644, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.024387269845140835, | |
| "grad_norm": 1.457358625174777, | |
| "learning_rate": 2.403846153846154e-06, | |
| "loss": 0.2419, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02536276063894647, | |
| "grad_norm": 1.7014683746851735, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2544, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.026338251432752105, | |
| "grad_norm": 1.3815851595211681, | |
| "learning_rate": 2.5961538461538465e-06, | |
| "loss": 0.223, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.027313742226557736, | |
| "grad_norm": 1.3007184555893962, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 0.2456, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.02828923302036337, | |
| "grad_norm": 1.4472785639522512, | |
| "learning_rate": 2.7884615384615386e-06, | |
| "loss": 0.2516, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.029264723814169002, | |
| "grad_norm": 1.3652941522006306, | |
| "learning_rate": 2.8846153846153845e-06, | |
| "loss": 0.2416, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.030240214607974637, | |
| "grad_norm": 1.2522050977996877, | |
| "learning_rate": 2.980769230769231e-06, | |
| "loss": 0.2329, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.031215705401780272, | |
| "grad_norm": 1.1304756888064047, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "loss": 0.2217, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03219119619558591, | |
| "grad_norm": 1.4050382297283845, | |
| "learning_rate": 3.1730769230769233e-06, | |
| "loss": 0.2309, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.033166686989391535, | |
| "grad_norm": 1.0865416261086687, | |
| "learning_rate": 3.2692307692307696e-06, | |
| "loss": 0.2217, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03414217778319717, | |
| "grad_norm": 1.303187815201321, | |
| "learning_rate": 3.365384615384616e-06, | |
| "loss": 0.2338, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.035117668577002804, | |
| "grad_norm": 1.2021321704753516, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 0.2137, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03609315937080844, | |
| "grad_norm": 1.1561740561669396, | |
| "learning_rate": 3.557692307692308e-06, | |
| "loss": 0.2162, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.037068650164614074, | |
| "grad_norm": 1.287983442354845, | |
| "learning_rate": 3.653846153846154e-06, | |
| "loss": 0.228, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0380441409584197, | |
| "grad_norm": 1.0789644716188591, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.2106, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03901963175222534, | |
| "grad_norm": 1.1684419312705403, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 0.218, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03999512254603097, | |
| "grad_norm": 1.1195832124738565, | |
| "learning_rate": 3.942307692307692e-06, | |
| "loss": 0.2032, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.040970613339836606, | |
| "grad_norm": 1.0236100570446467, | |
| "learning_rate": 4.0384615384615385e-06, | |
| "loss": 0.217, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04194610413364224, | |
| "grad_norm": 1.182743593377574, | |
| "learning_rate": 4.134615384615385e-06, | |
| "loss": 0.2215, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04292159492744787, | |
| "grad_norm": 1.1077853276119973, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 0.2138, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.043897085721253504, | |
| "grad_norm": 1.1863700432181472, | |
| "learning_rate": 4.326923076923077e-06, | |
| "loss": 0.2113, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04487257651505914, | |
| "grad_norm": 1.1214318092951887, | |
| "learning_rate": 4.423076923076924e-06, | |
| "loss": 0.2106, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04584806730886477, | |
| "grad_norm": 1.0805493864146463, | |
| "learning_rate": 4.51923076923077e-06, | |
| "loss": 0.2104, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04682355810267041, | |
| "grad_norm": 1.0336295929522208, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 0.1981, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04779904889647604, | |
| "grad_norm": 1.119869757376067, | |
| "learning_rate": 4.711538461538462e-06, | |
| "loss": 0.2014, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.04877453969028167, | |
| "grad_norm": 1.030817002409712, | |
| "learning_rate": 4.807692307692308e-06, | |
| "loss": 0.1996, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.049750030484087306, | |
| "grad_norm": 1.0055854671916695, | |
| "learning_rate": 4.903846153846154e-06, | |
| "loss": 0.2052, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05072552127789294, | |
| "grad_norm": 1.107516098502413, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2168, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.051701012071698575, | |
| "grad_norm": 1.0591849818221413, | |
| "learning_rate": 4.9999869688212956e-06, | |
| "loss": 0.2099, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05267650286550421, | |
| "grad_norm": 1.0486177613460919, | |
| "learning_rate": 4.999947875421032e-06, | |
| "loss": 0.1978, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.05365199365930984, | |
| "grad_norm": 1.0333038397384442, | |
| "learning_rate": 4.999882720206755e-06, | |
| "loss": 0.1934, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.05462748445311547, | |
| "grad_norm": 1.0394851360953357, | |
| "learning_rate": 4.999791503857704e-06, | |
| "loss": 0.2019, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05560297524692111, | |
| "grad_norm": 1.1046443338767855, | |
| "learning_rate": 4.999674227324805e-06, | |
| "loss": 0.2044, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.05657846604072674, | |
| "grad_norm": 1.2371807932970489, | |
| "learning_rate": 4.9995308918306595e-06, | |
| "loss": 0.1972, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 0.9905094942349434, | |
| "learning_rate": 4.99936149886953e-06, | |
| "loss": 0.19, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.058529447628338005, | |
| "grad_norm": 1.005308785484947, | |
| "learning_rate": 4.999166050207331e-06, | |
| "loss": 0.1838, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05950493842214364, | |
| "grad_norm": 1.0197164862121335, | |
| "learning_rate": 4.9989445478816e-06, | |
| "loss": 0.1906, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.060480429215949275, | |
| "grad_norm": 1.112996579159861, | |
| "learning_rate": 4.99869699420149e-06, | |
| "loss": 0.1949, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06145592000975491, | |
| "grad_norm": 0.9210560581443838, | |
| "learning_rate": 4.998423391747731e-06, | |
| "loss": 0.1869, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.062431410803560544, | |
| "grad_norm": 0.994708396507738, | |
| "learning_rate": 4.998123743372615e-06, | |
| "loss": 0.1832, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.06340690159736617, | |
| "grad_norm": 1.000252041680365, | |
| "learning_rate": 4.997798052199959e-06, | |
| "loss": 0.2011, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06438239239117181, | |
| "grad_norm": 1.001566090006205, | |
| "learning_rate": 4.9974463216250735e-06, | |
| "loss": 0.1851, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06535788318497744, | |
| "grad_norm": 1.0035711549242645, | |
| "learning_rate": 4.9970685553147316e-06, | |
| "loss": 0.1959, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.06633337397878307, | |
| "grad_norm": 1.0321021956412775, | |
| "learning_rate": 4.996664757207124e-06, | |
| "loss": 0.1948, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.06730886477258871, | |
| "grad_norm": 1.1196394074729383, | |
| "learning_rate": 4.996234931511823e-06, | |
| "loss": 0.1959, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.06828435556639434, | |
| "grad_norm": 1.204621557285872, | |
| "learning_rate": 4.995779082709739e-06, | |
| "loss": 0.1962, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06925984636019998, | |
| "grad_norm": 1.0177518163557318, | |
| "learning_rate": 4.995297215553067e-06, | |
| "loss": 0.1806, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.07023533715400561, | |
| "grad_norm": 1.0847615883655475, | |
| "learning_rate": 4.994789335065245e-06, | |
| "loss": 0.2044, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07121082794781124, | |
| "grad_norm": 1.168873122167307, | |
| "learning_rate": 4.9942554465409e-06, | |
| "loss": 0.1934, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.07218631874161688, | |
| "grad_norm": 1.019104243672844, | |
| "learning_rate": 4.993695555545789e-06, | |
| "loss": 0.1905, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0731618095354225, | |
| "grad_norm": 1.1308032239714556, | |
| "learning_rate": 4.993109667916742e-06, | |
| "loss": 0.193, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07413730032922815, | |
| "grad_norm": 1.1401766992269902, | |
| "learning_rate": 4.992497789761606e-06, | |
| "loss": 0.1793, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07511279112303378, | |
| "grad_norm": 1.1144999371666817, | |
| "learning_rate": 4.991859927459174e-06, | |
| "loss": 0.2023, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.0760882819168394, | |
| "grad_norm": 1.191890901059724, | |
| "learning_rate": 4.991196087659125e-06, | |
| "loss": 0.1858, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.07706377271064505, | |
| "grad_norm": 1.11445392411597, | |
| "learning_rate": 4.990506277281952e-06, | |
| "loss": 0.1843, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.07803926350445067, | |
| "grad_norm": 1.1630978983896232, | |
| "learning_rate": 4.989790503518888e-06, | |
| "loss": 0.1775, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07901475429825632, | |
| "grad_norm": 1.3514121202700597, | |
| "learning_rate": 4.989048773831834e-06, | |
| "loss": 0.1879, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.07999024509206194, | |
| "grad_norm": 1.0138569119140257, | |
| "learning_rate": 4.988281095953279e-06, | |
| "loss": 0.1712, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.08096573588586757, | |
| "grad_norm": 1.2876601226086022, | |
| "learning_rate": 4.987487477886221e-06, | |
| "loss": 0.1806, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08194122667967321, | |
| "grad_norm": 1.0842905018940538, | |
| "learning_rate": 4.986667927904084e-06, | |
| "loss": 0.172, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.08291671747347884, | |
| "grad_norm": 1.0011346482797712, | |
| "learning_rate": 4.985822454550629e-06, | |
| "loss": 0.1775, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08389220826728448, | |
| "grad_norm": 1.322374107432227, | |
| "learning_rate": 4.984951066639869e-06, | |
| "loss": 0.1872, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.08486769906109011, | |
| "grad_norm": 1.0531176300373897, | |
| "learning_rate": 4.984053773255971e-06, | |
| "loss": 0.1848, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.08584318985489574, | |
| "grad_norm": 1.0696103709835127, | |
| "learning_rate": 4.9831305837531684e-06, | |
| "loss": 0.1789, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.08681868064870138, | |
| "grad_norm": 1.048271406336639, | |
| "learning_rate": 4.98218150775566e-06, | |
| "loss": 0.1763, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.08779417144250701, | |
| "grad_norm": 0.9981980018547214, | |
| "learning_rate": 4.9812065551575075e-06, | |
| "loss": 0.1746, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08876966223631265, | |
| "grad_norm": 1.0986911182229246, | |
| "learning_rate": 4.9802057361225375e-06, | |
| "loss": 0.2019, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.08974515303011828, | |
| "grad_norm": 1.0482728855361627, | |
| "learning_rate": 4.97917906108423e-06, | |
| "loss": 0.1725, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0907206438239239, | |
| "grad_norm": 1.0599473411074543, | |
| "learning_rate": 4.978126540745615e-06, | |
| "loss": 0.1804, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09169613461772955, | |
| "grad_norm": 1.078553827467801, | |
| "learning_rate": 4.977048186079155e-06, | |
| "loss": 0.2011, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.09267162541153517, | |
| "grad_norm": 1.0444452777467914, | |
| "learning_rate": 4.975944008326638e-06, | |
| "loss": 0.1835, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09364711620534082, | |
| "grad_norm": 1.0791056951072164, | |
| "learning_rate": 4.974814018999053e-06, | |
| "loss": 0.1824, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.09462260699914644, | |
| "grad_norm": 1.078717583871865, | |
| "learning_rate": 4.973658229876476e-06, | |
| "loss": 0.189, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.09559809779295209, | |
| "grad_norm": 1.124139067655016, | |
| "learning_rate": 4.97247665300794e-06, | |
| "loss": 0.1832, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.09657358858675771, | |
| "grad_norm": 1.0195889033855914, | |
| "learning_rate": 4.971269300711318e-06, | |
| "loss": 0.1766, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.09754907938056334, | |
| "grad_norm": 1.1122862478289546, | |
| "learning_rate": 4.970036185573189e-06, | |
| "loss": 0.1805, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09852457017436898, | |
| "grad_norm": 1.1044026960421833, | |
| "learning_rate": 4.968777320448707e-06, | |
| "loss": 0.1836, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.09950006096817461, | |
| "grad_norm": 1.204423675193318, | |
| "learning_rate": 4.96749271846147e-06, | |
| "loss": 0.1767, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.10047555176198025, | |
| "grad_norm": 1.0248442266116176, | |
| "learning_rate": 4.96618239300338e-06, | |
| "loss": 0.1912, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.10145104255578588, | |
| "grad_norm": 1.1740907364465507, | |
| "learning_rate": 4.964846357734505e-06, | |
| "loss": 0.1726, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.10242653334959151, | |
| "grad_norm": 0.9983984379050335, | |
| "learning_rate": 4.963484626582937e-06, | |
| "loss": 0.1787, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10340202414339715, | |
| "grad_norm": 1.0692395971458715, | |
| "learning_rate": 4.9620972137446456e-06, | |
| "loss": 0.179, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.10437751493720278, | |
| "grad_norm": 1.0955109652052737, | |
| "learning_rate": 4.96068413368333e-06, | |
| "loss": 0.1765, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.10535300573100842, | |
| "grad_norm": 1.0684942634997572, | |
| "learning_rate": 4.959245401130269e-06, | |
| "loss": 0.172, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.10632849652481405, | |
| "grad_norm": 0.9995901623406969, | |
| "learning_rate": 4.957781031084169e-06, | |
| "loss": 0.1805, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.10730398731861968, | |
| "grad_norm": 0.9323445364396948, | |
| "learning_rate": 4.956291038811003e-06, | |
| "loss": 0.1853, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10827947811242532, | |
| "grad_norm": 1.1033013476735185, | |
| "learning_rate": 4.954775439843855e-06, | |
| "loss": 0.1873, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.10925496890623095, | |
| "grad_norm": 1.1691417741452548, | |
| "learning_rate": 4.9532342499827584e-06, | |
| "loss": 0.185, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.11023045970003659, | |
| "grad_norm": 1.1918610910255243, | |
| "learning_rate": 4.95166748529453e-06, | |
| "loss": 0.1892, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.11120595049384221, | |
| "grad_norm": 1.1014111858940245, | |
| "learning_rate": 4.950075162112602e-06, | |
| "loss": 0.1735, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.11218144128764784, | |
| "grad_norm": 1.216567039997431, | |
| "learning_rate": 4.9484572970368516e-06, | |
| "loss": 0.1838, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11315693208145348, | |
| "grad_norm": 1.1097171959187067, | |
| "learning_rate": 4.946813906933432e-06, | |
| "loss": 0.1874, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.11413242287525911, | |
| "grad_norm": 0.967977115534241, | |
| "learning_rate": 4.94514500893459e-06, | |
| "loss": 0.1738, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.11510791366906475, | |
| "grad_norm": 0.9657457430010942, | |
| "learning_rate": 4.943450620438491e-06, | |
| "loss": 0.1752, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.11608340446287038, | |
| "grad_norm": 0.9166596111467156, | |
| "learning_rate": 4.941730759109041e-06, | |
| "loss": 0.1818, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.11705889525667601, | |
| "grad_norm": 0.9450152131596894, | |
| "learning_rate": 4.939985442875695e-06, | |
| "loss": 0.1799, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11803438605048165, | |
| "grad_norm": 0.938177849072337, | |
| "learning_rate": 4.938214689933276e-06, | |
| "loss": 0.1735, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.11900987684428728, | |
| "grad_norm": 1.0415880049786728, | |
| "learning_rate": 4.93641851874178e-06, | |
| "loss": 0.1868, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.11998536763809292, | |
| "grad_norm": 0.9746213204544258, | |
| "learning_rate": 4.9345969480261925e-06, | |
| "loss": 0.1829, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.12096085843189855, | |
| "grad_norm": 0.9444433221181193, | |
| "learning_rate": 4.932749996776282e-06, | |
| "loss": 0.1731, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.12193634922570418, | |
| "grad_norm": 1.0120487885636702, | |
| "learning_rate": 4.93087768424641e-06, | |
| "loss": 0.1886, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12291184001950982, | |
| "grad_norm": 0.9837069724067731, | |
| "learning_rate": 4.928980029955329e-06, | |
| "loss": 0.1821, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.12388733081331545, | |
| "grad_norm": 0.9425235884710698, | |
| "learning_rate": 4.927057053685975e-06, | |
| "loss": 0.1801, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.12486282160712109, | |
| "grad_norm": 1.0113092843870408, | |
| "learning_rate": 4.925108775485269e-06, | |
| "loss": 0.1796, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.12583831240092672, | |
| "grad_norm": 1.0163088829406406, | |
| "learning_rate": 4.923135215663897e-06, | |
| "loss": 0.1654, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.12681380319473234, | |
| "grad_norm": 1.164966417682963, | |
| "learning_rate": 4.921136394796109e-06, | |
| "loss": 0.1803, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12778929398853797, | |
| "grad_norm": 1.1687221264442198, | |
| "learning_rate": 4.919112333719498e-06, | |
| "loss": 0.1878, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.12876478478234363, | |
| "grad_norm": 0.9888510085952801, | |
| "learning_rate": 4.9170630535347866e-06, | |
| "loss": 0.1771, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.12974027557614926, | |
| "grad_norm": 1.2032250564065257, | |
| "learning_rate": 4.914988575605602e-06, | |
| "loss": 0.1893, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.13071576636995488, | |
| "grad_norm": 0.9809911156410793, | |
| "learning_rate": 4.91288892155826e-06, | |
| "loss": 0.1728, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1316912571637605, | |
| "grad_norm": 1.2533075523741326, | |
| "learning_rate": 4.910764113281533e-06, | |
| "loss": 0.1836, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13266674795756614, | |
| "grad_norm": 1.0029237484325686, | |
| "learning_rate": 4.908614172926426e-06, | |
| "loss": 0.1757, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1336422387513718, | |
| "grad_norm": 1.0323575219354129, | |
| "learning_rate": 4.906439122905946e-06, | |
| "loss": 0.161, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.13461772954517742, | |
| "grad_norm": 0.9974877118091979, | |
| "learning_rate": 4.904238985894864e-06, | |
| "loss": 0.1644, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 0.8968544196822601, | |
| "learning_rate": 4.902013784829483e-06, | |
| "loss": 0.1541, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.13656871113278868, | |
| "grad_norm": 1.067306106970932, | |
| "learning_rate": 4.899763542907399e-06, | |
| "loss": 0.1689, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1375442019265943, | |
| "grad_norm": 0.9831550732650544, | |
| "learning_rate": 4.897488283587253e-06, | |
| "loss": 0.1764, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.13851969272039996, | |
| "grad_norm": 1.098512924207099, | |
| "learning_rate": 4.895188030588495e-06, | |
| "loss": 0.1671, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1394951835142056, | |
| "grad_norm": 0.9813597480687253, | |
| "learning_rate": 4.892862807891131e-06, | |
| "loss": 0.1723, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.14047067430801122, | |
| "grad_norm": 0.9059688793766486, | |
| "learning_rate": 4.890512639735475e-06, | |
| "loss": 0.1585, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.14144616510181685, | |
| "grad_norm": 0.9421090522610057, | |
| "learning_rate": 4.888137550621897e-06, | |
| "loss": 0.1667, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.14242165589562247, | |
| "grad_norm": 0.9984203476569828, | |
| "learning_rate": 4.885737565310565e-06, | |
| "loss": 0.1748, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.14339714668942813, | |
| "grad_norm": 0.9344709716653912, | |
| "learning_rate": 4.883312708821188e-06, | |
| "loss": 0.172, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.14437263748323376, | |
| "grad_norm": 0.995510093417273, | |
| "learning_rate": 4.880863006432758e-06, | |
| "loss": 0.1704, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.14534812827703938, | |
| "grad_norm": 0.9658034107481427, | |
| "learning_rate": 4.878388483683281e-06, | |
| "loss": 0.1779, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.146323619070845, | |
| "grad_norm": 1.0283682187427396, | |
| "learning_rate": 4.8758891663695165e-06, | |
| "loss": 0.1733, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14729910986465064, | |
| "grad_norm": 1.0848455109349466, | |
| "learning_rate": 4.873365080546706e-06, | |
| "loss": 0.1883, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1482746006584563, | |
| "grad_norm": 1.0034493473399255, | |
| "learning_rate": 4.870816252528298e-06, | |
| "loss": 0.1725, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.14925009145226192, | |
| "grad_norm": 1.0714206307709666, | |
| "learning_rate": 4.868242708885681e-06, | |
| "loss": 0.1855, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.15022558224606755, | |
| "grad_norm": 1.0894809817408557, | |
| "learning_rate": 4.8656444764479005e-06, | |
| "loss": 0.1815, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.15120107303987318, | |
| "grad_norm": 1.0188695855109409, | |
| "learning_rate": 4.863021582301381e-06, | |
| "loss": 0.171, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1521765638336788, | |
| "grad_norm": 1.0015265493016945, | |
| "learning_rate": 4.860374053789643e-06, | |
| "loss": 0.1703, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.15315205462748446, | |
| "grad_norm": 0.9798681079088354, | |
| "learning_rate": 4.857701918513023e-06, | |
| "loss": 0.1762, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1541275454212901, | |
| "grad_norm": 0.9770978700089492, | |
| "learning_rate": 4.855005204328378e-06, | |
| "loss": 0.159, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.15510303621509572, | |
| "grad_norm": 1.0074537647270105, | |
| "learning_rate": 4.8522839393487976e-06, | |
| "loss": 0.1701, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.15607852700890135, | |
| "grad_norm": 0.983344185894382, | |
| "learning_rate": 4.849538151943316e-06, | |
| "loss": 0.1742, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15705401780270697, | |
| "grad_norm": 0.9614400815443347, | |
| "learning_rate": 4.84676787073661e-06, | |
| "loss": 0.173, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.15802950859651263, | |
| "grad_norm": 0.9526818664956649, | |
| "learning_rate": 4.843973124608703e-06, | |
| "loss": 0.1732, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.15900499939031826, | |
| "grad_norm": 0.8985619765911587, | |
| "learning_rate": 4.841153942694664e-06, | |
| "loss": 0.1665, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.15998049018412389, | |
| "grad_norm": 0.9257494791929499, | |
| "learning_rate": 4.838310354384304e-06, | |
| "loss": 0.1684, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.1609559809779295, | |
| "grad_norm": 0.9783281983993328, | |
| "learning_rate": 4.835442389321867e-06, | |
| "loss": 0.1692, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.16193147177173514, | |
| "grad_norm": 0.8866263340546827, | |
| "learning_rate": 4.832550077405727e-06, | |
| "loss": 0.1563, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1629069625655408, | |
| "grad_norm": 1.0759800513612392, | |
| "learning_rate": 4.829633448788072e-06, | |
| "loss": 0.1796, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.16388245335934642, | |
| "grad_norm": 1.0415739389017458, | |
| "learning_rate": 4.826692533874586e-06, | |
| "loss": 0.1736, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.16485794415315205, | |
| "grad_norm": 1.0321014340070922, | |
| "learning_rate": 4.823727363324142e-06, | |
| "loss": 0.1763, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.16583343494695768, | |
| "grad_norm": 1.149401130099103, | |
| "learning_rate": 4.820737968048471e-06, | |
| "loss": 0.1735, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1668089257407633, | |
| "grad_norm": 0.9668062539942579, | |
| "learning_rate": 4.8177243792118515e-06, | |
| "loss": 0.1843, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.16778441653456896, | |
| "grad_norm": 1.0145067381113042, | |
| "learning_rate": 4.8146866282307725e-06, | |
| "loss": 0.1719, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1687599073283746, | |
| "grad_norm": 0.9146830744890082, | |
| "learning_rate": 4.811624746773616e-06, | |
| "loss": 0.1597, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.16973539812218022, | |
| "grad_norm": 0.9826930086582164, | |
| "learning_rate": 4.808538766760321e-06, | |
| "loss": 0.1539, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.17071088891598585, | |
| "grad_norm": 0.9622129610913185, | |
| "learning_rate": 4.805428720362054e-06, | |
| "loss": 0.1682, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.17168637970979148, | |
| "grad_norm": 1.200349435662473, | |
| "learning_rate": 4.8022946400008705e-06, | |
| "loss": 0.1687, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.17266187050359713, | |
| "grad_norm": 1.0757087822872222, | |
| "learning_rate": 4.79913655834938e-06, | |
| "loss": 0.1812, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.17363736129740276, | |
| "grad_norm": 1.0368299327714814, | |
| "learning_rate": 4.795954508330403e-06, | |
| "loss": 0.1721, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.1746128520912084, | |
| "grad_norm": 1.1790584459327953, | |
| "learning_rate": 4.79274852311663e-06, | |
| "loss": 0.1708, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.17558834288501401, | |
| "grad_norm": 0.9846847616375829, | |
| "learning_rate": 4.7895186361302736e-06, | |
| "loss": 0.1639, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17656383367881964, | |
| "grad_norm": 0.9524671440100206, | |
| "learning_rate": 4.786264881042722e-06, | |
| "loss": 0.1544, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1775393244726253, | |
| "grad_norm": 1.0337867693599667, | |
| "learning_rate": 4.782987291774186e-06, | |
| "loss": 0.1586, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.17851481526643093, | |
| "grad_norm": 0.955523189772594, | |
| "learning_rate": 4.779685902493346e-06, | |
| "loss": 0.1703, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.17949030606023655, | |
| "grad_norm": 0.8949175563809436, | |
| "learning_rate": 4.776360747616999e-06, | |
| "loss": 0.1536, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.18046579685404218, | |
| "grad_norm": 1.0812607035187014, | |
| "learning_rate": 4.773011861809694e-06, | |
| "loss": 0.1729, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1814412876478478, | |
| "grad_norm": 0.9776277191751103, | |
| "learning_rate": 4.769639279983372e-06, | |
| "loss": 0.1745, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.18241677844165347, | |
| "grad_norm": 1.0274757082533237, | |
| "learning_rate": 4.76624303729701e-06, | |
| "loss": 0.1651, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1833922692354591, | |
| "grad_norm": 1.0244957983233014, | |
| "learning_rate": 4.762823169156242e-06, | |
| "loss": 0.1642, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.18436776002926472, | |
| "grad_norm": 0.9535466834456596, | |
| "learning_rate": 4.7593797112129995e-06, | |
| "loss": 0.1701, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.18534325082307035, | |
| "grad_norm": 1.0223048067541962, | |
| "learning_rate": 4.755912699365135e-06, | |
| "loss": 0.1619, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18631874161687598, | |
| "grad_norm": 0.9985286606007027, | |
| "learning_rate": 4.752422169756048e-06, | |
| "loss": 0.1541, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.18729423241068163, | |
| "grad_norm": 0.9550222906892099, | |
| "learning_rate": 4.748908158774312e-06, | |
| "loss": 0.1583, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.18826972320448726, | |
| "grad_norm": 1.0220426764043464, | |
| "learning_rate": 4.745370703053291e-06, | |
| "loss": 0.1685, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1892452139982929, | |
| "grad_norm": 0.9903476130292974, | |
| "learning_rate": 4.741809839470758e-06, | |
| "loss": 0.1624, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.19022070479209852, | |
| "grad_norm": 0.9340244675396043, | |
| "learning_rate": 4.738225605148514e-06, | |
| "loss": 0.171, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.19119619558590417, | |
| "grad_norm": 0.9933142717509476, | |
| "learning_rate": 4.734618037451997e-06, | |
| "loss": 0.1677, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.1921716863797098, | |
| "grad_norm": 1.1481162666912414, | |
| "learning_rate": 4.730987173989894e-06, | |
| "loss": 0.1654, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.19314717717351543, | |
| "grad_norm": 1.0010562488790584, | |
| "learning_rate": 4.727333052613749e-06, | |
| "loss": 0.1689, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.19412266796732106, | |
| "grad_norm": 0.943204986199299, | |
| "learning_rate": 4.7236557114175705e-06, | |
| "loss": 0.153, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.19509815876112668, | |
| "grad_norm": 1.004212156065797, | |
| "learning_rate": 4.71995518873743e-06, | |
| "loss": 0.1738, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19607364955493234, | |
| "grad_norm": 0.9943567195079659, | |
| "learning_rate": 4.716231523151065e-06, | |
| "loss": 0.1621, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.19704914034873797, | |
| "grad_norm": 0.9438877027311198, | |
| "learning_rate": 4.712484753477478e-06, | |
| "loss": 0.1608, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1980246311425436, | |
| "grad_norm": 1.0575615752386678, | |
| "learning_rate": 4.708714918776527e-06, | |
| "loss": 0.1712, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.19900012193634922, | |
| "grad_norm": 0.9584987238953627, | |
| "learning_rate": 4.704922058348526e-06, | |
| "loss": 0.1586, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.19997561273015485, | |
| "grad_norm": 0.9822024229285666, | |
| "learning_rate": 4.701106211733827e-06, | |
| "loss": 0.1633, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2009511035239605, | |
| "grad_norm": 1.0567476044736286, | |
| "learning_rate": 4.697267418712415e-06, | |
| "loss": 0.1682, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.20192659431776613, | |
| "grad_norm": 0.9248879785588529, | |
| "learning_rate": 4.693405719303487e-06, | |
| "loss": 0.158, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.20290208511157176, | |
| "grad_norm": 1.0157347860608708, | |
| "learning_rate": 4.689521153765039e-06, | |
| "loss": 0.1585, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2038775759053774, | |
| "grad_norm": 1.0718965104259908, | |
| "learning_rate": 4.685613762593446e-06, | |
| "loss": 0.1664, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.20485306669918302, | |
| "grad_norm": 1.1039750845125622, | |
| "learning_rate": 4.681683586523037e-06, | |
| "loss": 0.1678, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.20582855749298867, | |
| "grad_norm": 1.0549278846097074, | |
| "learning_rate": 4.677730666525675e-06, | |
| "loss": 0.1661, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2068040482867943, | |
| "grad_norm": 1.0350210106238715, | |
| "learning_rate": 4.673755043810324e-06, | |
| "loss": 0.1629, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.20777953908059993, | |
| "grad_norm": 1.0928587210817202, | |
| "learning_rate": 4.669756759822625e-06, | |
| "loss": 0.1641, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.20875502987440556, | |
| "grad_norm": 1.0139520854082134, | |
| "learning_rate": 4.66573585624446e-06, | |
| "loss": 0.1689, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.20973052066821118, | |
| "grad_norm": 1.1017739109840923, | |
| "learning_rate": 4.661692374993519e-06, | |
| "loss": 0.1624, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.21070601146201684, | |
| "grad_norm": 1.2340168628656984, | |
| "learning_rate": 4.657626358222864e-06, | |
| "loss": 0.1756, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.21168150225582247, | |
| "grad_norm": 0.9230324919898438, | |
| "learning_rate": 4.653537848320488e-06, | |
| "loss": 0.1602, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2126569930496281, | |
| "grad_norm": 1.0516263439718125, | |
| "learning_rate": 4.6494268879088745e-06, | |
| "loss": 0.161, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.21363248384343372, | |
| "grad_norm": 1.0294336700433153, | |
| "learning_rate": 4.6452935198445496e-06, | |
| "loss": 0.169, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.21460797463723935, | |
| "grad_norm": 0.950242561108817, | |
| "learning_rate": 4.64113778721764e-06, | |
| "loss": 0.1686, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.215583465431045, | |
| "grad_norm": 1.0972582723890745, | |
| "learning_rate": 4.636959733351422e-06, | |
| "loss": 0.1581, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.21655895622485064, | |
| "grad_norm": 0.9521794002072761, | |
| "learning_rate": 4.632759401801869e-06, | |
| "loss": 0.1592, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.21753444701865626, | |
| "grad_norm": 1.1612379311952095, | |
| "learning_rate": 4.628536836357196e-06, | |
| "loss": 0.166, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2185099378124619, | |
| "grad_norm": 0.9211219216002743, | |
| "learning_rate": 4.6242920810374095e-06, | |
| "loss": 0.1542, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.21948542860626752, | |
| "grad_norm": 0.937788934386043, | |
| "learning_rate": 4.62002518009384e-06, | |
| "loss": 0.1725, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.22046091940007317, | |
| "grad_norm": 0.9773446856643697, | |
| "learning_rate": 4.615736178008687e-06, | |
| "loss": 0.1566, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2214364101938788, | |
| "grad_norm": 0.9516125327713063, | |
| "learning_rate": 4.611425119494552e-06, | |
| "loss": 0.158, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.22241190098768443, | |
| "grad_norm": 1.0157719146604811, | |
| "learning_rate": 4.607092049493973e-06, | |
| "loss": 0.1633, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.22338739178149006, | |
| "grad_norm": 0.9692342820358121, | |
| "learning_rate": 4.6027370131789614e-06, | |
| "loss": 0.1622, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.22436288257529569, | |
| "grad_norm": 0.9487979994417061, | |
| "learning_rate": 4.5983600559505196e-06, | |
| "loss": 0.1596, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22533837336910134, | |
| "grad_norm": 1.0470848627284035, | |
| "learning_rate": 4.593961223438177e-06, | |
| "loss": 0.1707, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.22631386416290697, | |
| "grad_norm": 0.9284433450109106, | |
| "learning_rate": 4.589540561499512e-06, | |
| "loss": 0.149, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2272893549567126, | |
| "grad_norm": 1.0411956514134435, | |
| "learning_rate": 4.585098116219674e-06, | |
| "loss": 0.1666, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.22826484575051822, | |
| "grad_norm": 0.9982980604258467, | |
| "learning_rate": 4.580633933910901e-06, | |
| "loss": 0.1582, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.22924033654432385, | |
| "grad_norm": 1.0353148821593277, | |
| "learning_rate": 4.576148061112039e-06, | |
| "loss": 0.1798, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2302158273381295, | |
| "grad_norm": 0.8623535833361177, | |
| "learning_rate": 4.571640544588056e-06, | |
| "loss": 0.1568, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.23119131813193514, | |
| "grad_norm": 1.141933497627487, | |
| "learning_rate": 4.567111431329555e-06, | |
| "loss": 0.1556, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.23216680892574076, | |
| "grad_norm": 0.9999053398774652, | |
| "learning_rate": 4.562560768552283e-06, | |
| "loss": 0.1603, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2331422997195464, | |
| "grad_norm": 0.9284632452687369, | |
| "learning_rate": 4.55798860369664e-06, | |
| "loss": 0.1458, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.23411779051335202, | |
| "grad_norm": 0.9872863479156412, | |
| "learning_rate": 4.553394984427184e-06, | |
| "loss": 0.1621, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.23509328130715768, | |
| "grad_norm": 0.9100468729196777, | |
| "learning_rate": 4.548779958632134e-06, | |
| "loss": 0.1568, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2360687721009633, | |
| "grad_norm": 0.8444568550070792, | |
| "learning_rate": 4.54414357442287e-06, | |
| "loss": 0.1408, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.23704426289476893, | |
| "grad_norm": 1.0223092529639413, | |
| "learning_rate": 4.539485880133433e-06, | |
| "loss": 0.1609, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.23801975368857456, | |
| "grad_norm": 0.9414329883963978, | |
| "learning_rate": 4.534806924320021e-06, | |
| "loss": 0.1528, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2389952444823802, | |
| "grad_norm": 1.0175078568206852, | |
| "learning_rate": 4.5301067557604815e-06, | |
| "loss": 0.1607, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.23997073527618584, | |
| "grad_norm": 0.9503464111258508, | |
| "learning_rate": 4.525385423453803e-06, | |
| "loss": 0.1621, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.24094622606999147, | |
| "grad_norm": 1.0317509367040238, | |
| "learning_rate": 4.520642976619607e-06, | |
| "loss": 0.1562, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2419217168637971, | |
| "grad_norm": 0.9406630214637611, | |
| "learning_rate": 4.515879464697629e-06, | |
| "loss": 0.1524, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.24289720765760273, | |
| "grad_norm": 0.9116204738820602, | |
| "learning_rate": 4.51109493734721e-06, | |
| "loss": 0.1589, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.24387269845140835, | |
| "grad_norm": 0.9686707124955454, | |
| "learning_rate": 4.506289444446775e-06, | |
| "loss": 0.1661, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.244848189245214, | |
| "grad_norm": 0.8855697170450265, | |
| "learning_rate": 4.5014630360933136e-06, | |
| "loss": 0.1525, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.24582368003901964, | |
| "grad_norm": 0.8408850368006234, | |
| "learning_rate": 4.496615762601857e-06, | |
| "loss": 0.1553, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.24679917083282527, | |
| "grad_norm": 0.9791331885043342, | |
| "learning_rate": 4.491747674504956e-06, | |
| "loss": 0.1613, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2477746616266309, | |
| "grad_norm": 0.8536640710320617, | |
| "learning_rate": 4.48685882255215e-06, | |
| "loss": 0.1561, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.24875015242043652, | |
| "grad_norm": 0.9108673457445514, | |
| "learning_rate": 4.481949257709442e-06, | |
| "loss": 0.1581, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.24972564321424218, | |
| "grad_norm": 0.9929310498573025, | |
| "learning_rate": 4.477019031158767e-06, | |
| "loss": 0.1635, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2507011340080478, | |
| "grad_norm": 0.9319735021715781, | |
| "learning_rate": 4.472068194297453e-06, | |
| "loss": 0.1535, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.25167662480185343, | |
| "grad_norm": 0.8234410520072005, | |
| "learning_rate": 4.467096798737694e-06, | |
| "loss": 0.1419, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2526521155956591, | |
| "grad_norm": 0.8293157655059741, | |
| "learning_rate": 4.462104896306004e-06, | |
| "loss": 0.1509, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.2536276063894647, | |
| "grad_norm": 0.981516914054671, | |
| "learning_rate": 4.457092539042682e-06, | |
| "loss": 0.1503, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.25460309718327034, | |
| "grad_norm": 0.9280314009979089, | |
| "learning_rate": 4.452059779201267e-06, | |
| "loss": 0.1524, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.25557858797707594, | |
| "grad_norm": 0.9107843402176423, | |
| "learning_rate": 4.44700666924799e-06, | |
| "loss": 0.1599, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2565540787708816, | |
| "grad_norm": 0.9504200684566895, | |
| "learning_rate": 4.441933261861239e-06, | |
| "loss": 0.1603, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.25752956956468726, | |
| "grad_norm": 0.8932484399394331, | |
| "learning_rate": 4.436839609930992e-06, | |
| "loss": 0.166, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.25850506035849286, | |
| "grad_norm": 0.9657424201285992, | |
| "learning_rate": 4.431725766558284e-06, | |
| "loss": 0.1592, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2594805511522985, | |
| "grad_norm": 0.9280800047846618, | |
| "learning_rate": 4.426591785054637e-06, | |
| "loss": 0.1692, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2604560419461041, | |
| "grad_norm": 0.909753774216958, | |
| "learning_rate": 4.421437718941517e-06, | |
| "loss": 0.1546, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.26143153273990977, | |
| "grad_norm": 0.9249375767292966, | |
| "learning_rate": 4.416263621949769e-06, | |
| "loss": 0.158, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2624070235337154, | |
| "grad_norm": 0.9638546484727502, | |
| "learning_rate": 4.41106954801906e-06, | |
| "loss": 0.1549, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.263382514327521, | |
| "grad_norm": 0.9676726584506395, | |
| "learning_rate": 4.4058555512973135e-06, | |
| "loss": 0.1526, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2643580051213267, | |
| "grad_norm": 0.9643850490217997, | |
| "learning_rate": 4.4006216861401475e-06, | |
| "loss": 0.1507, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2653334959151323, | |
| "grad_norm": 0.9455841602811957, | |
| "learning_rate": 4.395368007110307e-06, | |
| "loss": 0.1609, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.26630898670893793, | |
| "grad_norm": 1.0032153539367255, | |
| "learning_rate": 4.390094568977099e-06, | |
| "loss": 0.1517, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2672844775027436, | |
| "grad_norm": 0.9286676590827331, | |
| "learning_rate": 4.384801426715814e-06, | |
| "loss": 0.1509, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2682599682965492, | |
| "grad_norm": 0.8635512942768435, | |
| "learning_rate": 4.379488635507157e-06, | |
| "loss": 0.1479, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.26923545909035485, | |
| "grad_norm": 0.9730864421049288, | |
| "learning_rate": 4.3741562507366754e-06, | |
| "loss": 0.1532, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.27021094988416045, | |
| "grad_norm": 0.8738885156811014, | |
| "learning_rate": 4.368804327994174e-06, | |
| "loss": 0.1579, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 0.9907365851233481, | |
| "learning_rate": 4.363432923073144e-06, | |
| "loss": 0.1543, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.27216193147177176, | |
| "grad_norm": 0.9242295443836225, | |
| "learning_rate": 4.3580420919701745e-06, | |
| "loss": 0.1584, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.27313742226557736, | |
| "grad_norm": 0.9088209928537309, | |
| "learning_rate": 4.352631890884373e-06, | |
| "loss": 0.1547, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.274112913059383, | |
| "grad_norm": 0.9101013673566347, | |
| "learning_rate": 4.347202376216775e-06, | |
| "loss": 0.1648, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.2750884038531886, | |
| "grad_norm": 0.9053322800776012, | |
| "learning_rate": 4.341753604569764e-06, | |
| "loss": 0.1488, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.27606389464699427, | |
| "grad_norm": 0.9622509070577334, | |
| "learning_rate": 4.336285632746472e-06, | |
| "loss": 0.155, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.2770393854407999, | |
| "grad_norm": 0.9097255445557655, | |
| "learning_rate": 4.330798517750194e-06, | |
| "loss": 0.138, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.2780148762346055, | |
| "grad_norm": 0.915630210467682, | |
| "learning_rate": 4.3252923167837905e-06, | |
| "loss": 0.1533, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.2789903670284112, | |
| "grad_norm": 0.9410519479526743, | |
| "learning_rate": 4.319767087249094e-06, | |
| "loss": 0.1588, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.2799658578222168, | |
| "grad_norm": 0.9225807794360538, | |
| "learning_rate": 4.314222886746304e-06, | |
| "loss": 0.1546, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.28094134861602243, | |
| "grad_norm": 0.8732635665891079, | |
| "learning_rate": 4.308659773073398e-06, | |
| "loss": 0.1677, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.2819168394098281, | |
| "grad_norm": 0.979628005682833, | |
| "learning_rate": 4.303077804225517e-06, | |
| "loss": 0.1626, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.2828923302036337, | |
| "grad_norm": 0.9055375259430347, | |
| "learning_rate": 4.297477038394368e-06, | |
| "loss": 0.1675, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.28386782099743935, | |
| "grad_norm": 0.9480989580020738, | |
| "learning_rate": 4.291857533967616e-06, | |
| "loss": 0.1574, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.28484331179124495, | |
| "grad_norm": 0.8985804614382488, | |
| "learning_rate": 4.286219349528274e-06, | |
| "loss": 0.1557, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.2858188025850506, | |
| "grad_norm": 1.0146034373434893, | |
| "learning_rate": 4.280562543854091e-06, | |
| "loss": 0.1645, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.28679429337885626, | |
| "grad_norm": 0.890713161023523, | |
| "learning_rate": 4.274887175916946e-06, | |
| "loss": 0.1567, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.9267996009148974, | |
| "learning_rate": 4.269193304882226e-06, | |
| "loss": 0.16, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.2887452749664675, | |
| "grad_norm": 0.8938807739528849, | |
| "learning_rate": 4.263480990108212e-06, | |
| "loss": 0.1578, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.2897207657602731, | |
| "grad_norm": 0.9382622618633856, | |
| "learning_rate": 4.257750291145457e-06, | |
| "loss": 0.1594, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.29069625655407877, | |
| "grad_norm": 0.9825847935980099, | |
| "learning_rate": 4.252001267736174e-06, | |
| "loss": 0.1555, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.2916717473478844, | |
| "grad_norm": 0.8990364135002812, | |
| "learning_rate": 4.246233979813602e-06, | |
| "loss": 0.1603, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.29264723814169, | |
| "grad_norm": 0.8656451974006956, | |
| "learning_rate": 4.24044848750139e-06, | |
| "loss": 0.1491, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2936227289354957, | |
| "grad_norm": 0.9253142530395264, | |
| "learning_rate": 4.234644851112965e-06, | |
| "loss": 0.1607, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.2945982197293013, | |
| "grad_norm": 0.8969951216981602, | |
| "learning_rate": 4.228823131150904e-06, | |
| "loss": 0.1521, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.29557371052310694, | |
| "grad_norm": 0.9403876224279274, | |
| "learning_rate": 4.222983388306308e-06, | |
| "loss": 0.1616, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.2965492013169126, | |
| "grad_norm": 0.8925996666326684, | |
| "learning_rate": 4.217125683458162e-06, | |
| "loss": 0.1581, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.2975246921107182, | |
| "grad_norm": 0.8585766705268447, | |
| "learning_rate": 4.211250077672704e-06, | |
| "loss": 0.1492, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.29850018290452385, | |
| "grad_norm": 0.9350703216606489, | |
| "learning_rate": 4.20535663220279e-06, | |
| "loss": 0.1587, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.29947567369832945, | |
| "grad_norm": 0.9188676922420881, | |
| "learning_rate": 4.199445408487253e-06, | |
| "loss": 0.1593, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3004511644921351, | |
| "grad_norm": 0.9547113904108987, | |
| "learning_rate": 4.1935164681502626e-06, | |
| "loss": 0.151, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.30142665528594076, | |
| "grad_norm": 1.0033584349157778, | |
| "learning_rate": 4.187569873000684e-06, | |
| "loss": 0.1629, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.30240214607974636, | |
| "grad_norm": 0.9859571353144448, | |
| "learning_rate": 4.181605685031433e-06, | |
| "loss": 0.1734, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.303377636873552, | |
| "grad_norm": 0.8228370416849764, | |
| "learning_rate": 4.1756239664188275e-06, | |
| "loss": 0.1509, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3043531276673576, | |
| "grad_norm": 1.0367963117038492, | |
| "learning_rate": 4.169624779521944e-06, | |
| "loss": 0.1571, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.30532861846116327, | |
| "grad_norm": 0.9669617210813681, | |
| "learning_rate": 4.163608186881964e-06, | |
| "loss": 0.1628, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3063041092549689, | |
| "grad_norm": 1.0103654228797525, | |
| "learning_rate": 4.157574251221522e-06, | |
| "loss": 0.1595, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3072796000487745, | |
| "grad_norm": 0.9081265589361568, | |
| "learning_rate": 4.1515230354440526e-06, | |
| "loss": 0.1524, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3082550908425802, | |
| "grad_norm": 0.924809859907981, | |
| "learning_rate": 4.145454602633137e-06, | |
| "loss": 0.1619, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3092305816363858, | |
| "grad_norm": 0.8914505960487598, | |
| "learning_rate": 4.139369016051838e-06, | |
| "loss": 0.1603, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.31020607243019144, | |
| "grad_norm": 0.8961623985939995, | |
| "learning_rate": 4.1332663391420515e-06, | |
| "loss": 0.1604, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3111815632239971, | |
| "grad_norm": 0.8168107498311156, | |
| "learning_rate": 4.127146635523835e-06, | |
| "loss": 0.1437, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.3121570540178027, | |
| "grad_norm": 0.8664110012815645, | |
| "learning_rate": 4.121009968994751e-06, | |
| "loss": 0.1506, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.31313254481160835, | |
| "grad_norm": 0.9602948973971756, | |
| "learning_rate": 4.114856403529196e-06, | |
| "loss": 0.1463, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.31410803560541395, | |
| "grad_norm": 0.9778713250873332, | |
| "learning_rate": 4.10868600327774e-06, | |
| "loss": 0.1615, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3150835263992196, | |
| "grad_norm": 1.0278280133622393, | |
| "learning_rate": 4.102498832566454e-06, | |
| "loss": 0.1537, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.31605901719302526, | |
| "grad_norm": 0.9256019026472626, | |
| "learning_rate": 4.096294955896239e-06, | |
| "loss": 0.1611, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.31703450798683086, | |
| "grad_norm": 0.8287537359495541, | |
| "learning_rate": 4.090074437942155e-06, | |
| "loss": 0.1533, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3180099987806365, | |
| "grad_norm": 0.8835278034235962, | |
| "learning_rate": 4.083837343552749e-06, | |
| "loss": 0.1493, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3189854895744421, | |
| "grad_norm": 0.8259069804781106, | |
| "learning_rate": 4.077583737749373e-06, | |
| "loss": 0.1424, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.31996098036824777, | |
| "grad_norm": 0.941189764723523, | |
| "learning_rate": 4.07131368572551e-06, | |
| "loss": 0.1523, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.3209364711620534, | |
| "grad_norm": 0.9164961397020092, | |
| "learning_rate": 4.0650272528460955e-06, | |
| "loss": 0.1539, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.321911961955859, | |
| "grad_norm": 0.8541305146982892, | |
| "learning_rate": 4.058724504646834e-06, | |
| "loss": 0.1377, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3228874527496647, | |
| "grad_norm": 0.9562169457452313, | |
| "learning_rate": 4.052405506833516e-06, | |
| "loss": 0.1534, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3238629435434703, | |
| "grad_norm": 0.9199148267026572, | |
| "learning_rate": 4.046070325281333e-06, | |
| "loss": 0.1471, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.32483843433727594, | |
| "grad_norm": 0.9205784248133079, | |
| "learning_rate": 4.039719026034191e-06, | |
| "loss": 0.1403, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.3258139251310816, | |
| "grad_norm": 0.8621247018342253, | |
| "learning_rate": 4.0333516753040225e-06, | |
| "loss": 0.1365, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.3267894159248872, | |
| "grad_norm": 0.9318993173542444, | |
| "learning_rate": 4.026968339470097e-06, | |
| "loss": 0.1446, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.32776490671869285, | |
| "grad_norm": 0.9970504184721904, | |
| "learning_rate": 4.020569085078324e-06, | |
| "loss": 0.1553, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.32874039751249845, | |
| "grad_norm": 0.9288139122169886, | |
| "learning_rate": 4.014153978840568e-06, | |
| "loss": 0.1457, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.3297158883063041, | |
| "grad_norm": 0.947326368514792, | |
| "learning_rate": 4.007723087633943e-06, | |
| "loss": 0.1567, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.33069137910010976, | |
| "grad_norm": 0.933169285653779, | |
| "learning_rate": 4.001276478500127e-06, | |
| "loss": 0.1564, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.33166686989391536, | |
| "grad_norm": 0.8619195635693533, | |
| "learning_rate": 3.994814218644649e-06, | |
| "loss": 0.1409, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.332642360687721, | |
| "grad_norm": 0.9533171506313436, | |
| "learning_rate": 3.988336375436201e-06, | |
| "loss": 0.1472, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3336178514815266, | |
| "grad_norm": 0.9085852674744761, | |
| "learning_rate": 3.981843016405928e-06, | |
| "loss": 0.1518, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3345933422753323, | |
| "grad_norm": 0.8015484531793845, | |
| "learning_rate": 3.975334209246727e-06, | |
| "loss": 0.1448, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.33556883306913793, | |
| "grad_norm": 0.9409823279258879, | |
| "learning_rate": 3.968810021812544e-06, | |
| "loss": 0.1555, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.33654432386294353, | |
| "grad_norm": 0.8142592274829353, | |
| "learning_rate": 3.962270522117659e-06, | |
| "loss": 0.1457, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3375198146567492, | |
| "grad_norm": 0.9076632142318111, | |
| "learning_rate": 3.955715778335984e-06, | |
| "loss": 0.1538, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3384953054505548, | |
| "grad_norm": 0.8974350187857667, | |
| "learning_rate": 3.949145858800348e-06, | |
| "loss": 0.1481, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.33947079624436044, | |
| "grad_norm": 0.8901297893092709, | |
| "learning_rate": 3.942560832001789e-06, | |
| "loss": 0.1503, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3404462870381661, | |
| "grad_norm": 0.8560837002365613, | |
| "learning_rate": 3.935960766588835e-06, | |
| "loss": 0.1528, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3414217778319717, | |
| "grad_norm": 0.8443274184309183, | |
| "learning_rate": 3.9293457313667905e-06, | |
| "loss": 0.1436, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.34239726862577735, | |
| "grad_norm": 1.0282756436782237, | |
| "learning_rate": 3.922715795297022e-06, | |
| "loss": 0.1482, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.34337275941958295, | |
| "grad_norm": 0.9065229091333961, | |
| "learning_rate": 3.916071027496234e-06, | |
| "loss": 0.1445, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.3443482502133886, | |
| "grad_norm": 0.9714447847850254, | |
| "learning_rate": 3.909411497235752e-06, | |
| "loss": 0.1473, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.34532374100719426, | |
| "grad_norm": 0.9561585485417536, | |
| "learning_rate": 3.902737273940799e-06, | |
| "loss": 0.1471, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.34629923180099986, | |
| "grad_norm": 0.8900543390831095, | |
| "learning_rate": 3.8960484271897736e-06, | |
| "loss": 0.1527, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3472747225948055, | |
| "grad_norm": 0.9827409295726128, | |
| "learning_rate": 3.889345026713519e-06, | |
| "loss": 0.1534, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3482502133886111, | |
| "grad_norm": 0.8940002734667856, | |
| "learning_rate": 3.882627142394605e-06, | |
| "loss": 0.1486, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3492257041824168, | |
| "grad_norm": 0.9944296349312072, | |
| "learning_rate": 3.875894844266592e-06, | |
| "loss": 0.1531, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.35020119497622243, | |
| "grad_norm": 0.9120571509586781, | |
| "learning_rate": 3.869148202513303e-06, | |
| "loss": 0.1606, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.35117668577002803, | |
| "grad_norm": 0.9095948987953094, | |
| "learning_rate": 3.862387287468095e-06, | |
| "loss": 0.137, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3521521765638337, | |
| "grad_norm": 0.877930473526069, | |
| "learning_rate": 3.85561216961312e-06, | |
| "loss": 0.1408, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.3531276673576393, | |
| "grad_norm": 0.9699360794407749, | |
| "learning_rate": 3.8488229195785965e-06, | |
| "loss": 0.147, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.35410315815144494, | |
| "grad_norm": 0.8874366855608934, | |
| "learning_rate": 3.842019608142068e-06, | |
| "loss": 0.1446, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3550786489452506, | |
| "grad_norm": 0.8644114874928793, | |
| "learning_rate": 3.83520230622767e-06, | |
| "loss": 0.1484, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3560541397390562, | |
| "grad_norm": 0.9393508952046424, | |
| "learning_rate": 3.8283710849053835e-06, | |
| "loss": 0.1415, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.35702963053286185, | |
| "grad_norm": 0.915863410960202, | |
| "learning_rate": 3.821526015390302e-06, | |
| "loss": 0.1386, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.35800512132666745, | |
| "grad_norm": 0.9939022499746282, | |
| "learning_rate": 3.814667169041887e-06, | |
| "loss": 0.156, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.3589806121204731, | |
| "grad_norm": 0.9698635070361811, | |
| "learning_rate": 3.8077946173632175e-06, | |
| "loss": 0.1351, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.35995610291427876, | |
| "grad_norm": 0.9476385232414736, | |
| "learning_rate": 3.800908432000254e-06, | |
| "loss": 0.1427, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.36093159370808436, | |
| "grad_norm": 0.856130584200129, | |
| "learning_rate": 3.7940086847410875e-06, | |
| "loss": 0.1516, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.36190708450189, | |
| "grad_norm": 0.8579511362373262, | |
| "learning_rate": 3.7870954475151873e-06, | |
| "loss": 0.1453, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.3628825752956956, | |
| "grad_norm": 0.9244210071492551, | |
| "learning_rate": 3.780168792392658e-06, | |
| "loss": 0.1403, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3638580660895013, | |
| "grad_norm": 0.8916214840343722, | |
| "learning_rate": 3.7732287915834842e-06, | |
| "loss": 0.1344, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.36483355688330693, | |
| "grad_norm": 0.9285987965737049, | |
| "learning_rate": 3.766275517436779e-06, | |
| "loss": 0.1473, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.36580904767711253, | |
| "grad_norm": 1.0202253515003419, | |
| "learning_rate": 3.759309042440028e-06, | |
| "loss": 0.1626, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3667845384709182, | |
| "grad_norm": 0.9202482904809027, | |
| "learning_rate": 3.752329439218337e-06, | |
| "loss": 0.1437, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3677600292647238, | |
| "grad_norm": 0.868921559794134, | |
| "learning_rate": 3.7453367805336697e-06, | |
| "loss": 0.1411, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.36873552005852944, | |
| "grad_norm": 0.9278377375229888, | |
| "learning_rate": 3.738331139284096e-06, | |
| "loss": 0.1462, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3697110108523351, | |
| "grad_norm": 0.9555741240850121, | |
| "learning_rate": 3.731312588503024e-06, | |
| "loss": 0.1525, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3706865016461407, | |
| "grad_norm": 0.9746070816089245, | |
| "learning_rate": 3.7242812013584474e-06, | |
| "loss": 0.1509, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.37166199243994635, | |
| "grad_norm": 0.8847369605779092, | |
| "learning_rate": 3.717237051152175e-06, | |
| "loss": 0.1433, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.37263748323375195, | |
| "grad_norm": 1.0235063474249009, | |
| "learning_rate": 3.710180211319071e-06, | |
| "loss": 0.1486, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.3736129740275576, | |
| "grad_norm": 0.8950016936249444, | |
| "learning_rate": 3.703110755426289e-06, | |
| "loss": 0.139, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.37458846482136327, | |
| "grad_norm": 0.8847042144920154, | |
| "learning_rate": 3.696028757172503e-06, | |
| "loss": 0.1521, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.37556395561516887, | |
| "grad_norm": 0.9106515631528226, | |
| "learning_rate": 3.68893429038714e-06, | |
| "loss": 0.146, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3765394464089745, | |
| "grad_norm": 0.9546055014669876, | |
| "learning_rate": 3.681827429029613e-06, | |
| "loss": 0.1431, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3775149372027802, | |
| "grad_norm": 0.8713617926689637, | |
| "learning_rate": 3.6747082471885454e-06, | |
| "loss": 0.1484, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.3784904279965858, | |
| "grad_norm": 0.8981560581623195, | |
| "learning_rate": 3.6675768190810023e-06, | |
| "loss": 0.1405, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.37946591879039143, | |
| "grad_norm": 0.9576903681690667, | |
| "learning_rate": 3.6604332190517144e-06, | |
| "loss": 0.1461, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.38044140958419703, | |
| "grad_norm": 0.9982163299550169, | |
| "learning_rate": 3.653277521572305e-06, | |
| "loss": 0.15, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3814169003780027, | |
| "grad_norm": 0.8524151991534643, | |
| "learning_rate": 3.6461098012405116e-06, | |
| "loss": 0.1446, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.38239239117180834, | |
| "grad_norm": 0.9831471719181231, | |
| "learning_rate": 3.6389301327794114e-06, | |
| "loss": 0.1671, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.38336788196561394, | |
| "grad_norm": 0.9182177457148154, | |
| "learning_rate": 3.6317385910366365e-06, | |
| "loss": 0.1476, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.3843433727594196, | |
| "grad_norm": 0.9198423171501263, | |
| "learning_rate": 3.624535250983601e-06, | |
| "loss": 0.153, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3853188635532252, | |
| "grad_norm": 0.8808600354881101, | |
| "learning_rate": 3.6173201877147134e-06, | |
| "loss": 0.1456, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.38629435434703085, | |
| "grad_norm": 0.8829324038702485, | |
| "learning_rate": 3.6100934764465973e-06, | |
| "loss": 0.1413, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.3872698451408365, | |
| "grad_norm": 0.8876955529556401, | |
| "learning_rate": 3.6028551925173046e-06, | |
| "loss": 0.1512, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.3882453359346421, | |
| "grad_norm": 0.8555692280523547, | |
| "learning_rate": 3.595605411385533e-06, | |
| "loss": 0.1428, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.38922082672844777, | |
| "grad_norm": 0.8764418318857389, | |
| "learning_rate": 3.5883442086298375e-06, | |
| "loss": 0.1481, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.39019631752225337, | |
| "grad_norm": 0.926181501103571, | |
| "learning_rate": 3.581071659947842e-06, | |
| "loss": 0.1394, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.391171808316059, | |
| "grad_norm": 0.9087710728115378, | |
| "learning_rate": 3.5737878411554515e-06, | |
| "loss": 0.1396, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.3921472991098647, | |
| "grad_norm": 0.9084832908918349, | |
| "learning_rate": 3.566492828186063e-06, | |
| "loss": 0.1457, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3931227899036703, | |
| "grad_norm": 0.9198265733188117, | |
| "learning_rate": 3.5591866970897695e-06, | |
| "loss": 0.1428, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.39409828069747593, | |
| "grad_norm": 0.9028014238279914, | |
| "learning_rate": 3.55186952403257e-06, | |
| "loss": 0.1459, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.39507377149128153, | |
| "grad_norm": 0.7803451616200923, | |
| "learning_rate": 3.5445413852955783e-06, | |
| "loss": 0.1405, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3960492622850872, | |
| "grad_norm": 0.8807693627140049, | |
| "learning_rate": 3.5372023572742224e-06, | |
| "loss": 0.144, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.39702475307889284, | |
| "grad_norm": 0.9065679616664823, | |
| "learning_rate": 3.52985251647745e-06, | |
| "loss": 0.152, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.39800024387269844, | |
| "grad_norm": 0.9126785313651608, | |
| "learning_rate": 3.522491939526932e-06, | |
| "loss": 0.1527, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3989757346665041, | |
| "grad_norm": 0.9401530910172149, | |
| "learning_rate": 3.515120703156264e-06, | |
| "loss": 0.1546, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.3999512254603097, | |
| "grad_norm": 0.919541385453652, | |
| "learning_rate": 3.507738884210164e-06, | |
| "loss": 0.1397, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.40092671625411536, | |
| "grad_norm": 0.9257840949340401, | |
| "learning_rate": 3.500346559643675e-06, | |
| "loss": 0.1435, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.401902207047921, | |
| "grad_norm": 0.9038785967347172, | |
| "learning_rate": 3.4929438065213568e-06, | |
| "loss": 0.147, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4028776978417266, | |
| "grad_norm": 1.0676896384715988, | |
| "learning_rate": 3.48553070201649e-06, | |
| "loss": 0.1558, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.40385318863553227, | |
| "grad_norm": 0.9754468401487543, | |
| "learning_rate": 3.4781073234102665e-06, | |
| "loss": 0.1449, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.40482867942933787, | |
| "grad_norm": 0.8939601187260615, | |
| "learning_rate": 3.470673748090984e-06, | |
| "loss": 0.1396, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4058041702231435, | |
| "grad_norm": 0.9427428788545429, | |
| "learning_rate": 3.4632300535532415e-06, | |
| "loss": 0.1427, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 0.887782687868998, | |
| "learning_rate": 3.4557763173971293e-06, | |
| "loss": 0.1449, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4077551518107548, | |
| "grad_norm": 1.0018593992113438, | |
| "learning_rate": 3.4483126173274227e-06, | |
| "loss": 0.1473, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.40873064260456043, | |
| "grad_norm": 0.9539144339332059, | |
| "learning_rate": 3.440839031152769e-06, | |
| "loss": 0.1432, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.40970613339836603, | |
| "grad_norm": 0.8796520440270823, | |
| "learning_rate": 3.4333556367848773e-06, | |
| "loss": 0.1356, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4106816241921717, | |
| "grad_norm": 0.8930556857980667, | |
| "learning_rate": 3.425862512237708e-06, | |
| "loss": 0.1414, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.41165711498597735, | |
| "grad_norm": 0.9130386801180627, | |
| "learning_rate": 3.418359735626656e-06, | |
| "loss": 0.144, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.41263260577978295, | |
| "grad_norm": 0.8591156628779346, | |
| "learning_rate": 3.4108473851677408e-06, | |
| "loss": 0.1413, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.4136080965735886, | |
| "grad_norm": 0.9654725301304397, | |
| "learning_rate": 3.4033255391767865e-06, | |
| "loss": 0.1492, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4145835873673942, | |
| "grad_norm": 0.9083019018961785, | |
| "learning_rate": 3.395794276068609e-06, | |
| "loss": 0.1452, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.41555907816119986, | |
| "grad_norm": 0.8891185711032482, | |
| "learning_rate": 3.388253674356196e-06, | |
| "loss": 0.1415, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4165345689550055, | |
| "grad_norm": 0.9375975455278247, | |
| "learning_rate": 3.380703812649891e-06, | |
| "loss": 0.1403, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.4175100597488111, | |
| "grad_norm": 0.9020491738743014, | |
| "learning_rate": 3.3731447696565713e-06, | |
| "loss": 0.1455, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.41848555054261677, | |
| "grad_norm": 0.8574973373936586, | |
| "learning_rate": 3.3655766241788285e-06, | |
| "loss": 0.1368, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.41946104133642237, | |
| "grad_norm": 0.8757023776531063, | |
| "learning_rate": 3.357999455114148e-06, | |
| "loss": 0.1446, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.420436532130228, | |
| "grad_norm": 0.9837347529941712, | |
| "learning_rate": 3.350413341454086e-06, | |
| "loss": 0.1506, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.4214120229240337, | |
| "grad_norm": 0.8472380588550584, | |
| "learning_rate": 3.3428183622834442e-06, | |
| "loss": 0.1513, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4223875137178393, | |
| "grad_norm": 0.8387818280951195, | |
| "learning_rate": 3.3352145967794463e-06, | |
| "loss": 0.1403, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.42336300451164494, | |
| "grad_norm": 0.8972955470900738, | |
| "learning_rate": 3.3276021242109152e-06, | |
| "loss": 0.1411, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.42433849530545054, | |
| "grad_norm": 0.9676494460221445, | |
| "learning_rate": 3.319981023937442e-06, | |
| "loss": 0.1508, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4253139860992562, | |
| "grad_norm": 0.8603463763707745, | |
| "learning_rate": 3.3123513754085635e-06, | |
| "loss": 0.1412, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.42628947689306185, | |
| "grad_norm": 0.8735162753932564, | |
| "learning_rate": 3.3047132581629297e-06, | |
| "loss": 0.1356, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.42726496768686745, | |
| "grad_norm": 0.946772891936297, | |
| "learning_rate": 3.297066751827478e-06, | |
| "loss": 0.1364, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4282404584806731, | |
| "grad_norm": 0.9312293341123083, | |
| "learning_rate": 3.2894119361166e-06, | |
| "loss": 0.1384, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4292159492744787, | |
| "grad_norm": 0.8668697057456637, | |
| "learning_rate": 3.2817488908313132e-06, | |
| "loss": 0.1407, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.43019144006828436, | |
| "grad_norm": 0.9062795443546382, | |
| "learning_rate": 3.274077695858428e-06, | |
| "loss": 0.1511, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.43116693086209, | |
| "grad_norm": 0.9197029171414413, | |
| "learning_rate": 3.2663984311697137e-06, | |
| "loss": 0.1443, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.4321424216558956, | |
| "grad_norm": 0.9874318993389217, | |
| "learning_rate": 3.2587111768210677e-06, | |
| "loss": 0.1522, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.43311791244970127, | |
| "grad_norm": 0.8997910308707128, | |
| "learning_rate": 3.251016012951678e-06, | |
| "loss": 0.1376, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.43409340324350687, | |
| "grad_norm": 0.8780597656452647, | |
| "learning_rate": 3.2433130197831877e-06, | |
| "loss": 0.141, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.4350688940373125, | |
| "grad_norm": 0.8943843664972011, | |
| "learning_rate": 3.2356022776188623e-06, | |
| "loss": 0.1394, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.4360443848311182, | |
| "grad_norm": 0.9140727392888497, | |
| "learning_rate": 3.227883866842749e-06, | |
| "loss": 0.1384, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.4370198756249238, | |
| "grad_norm": 0.8988648983999619, | |
| "learning_rate": 3.2201578679188396e-06, | |
| "loss": 0.1383, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.43799536641872944, | |
| "grad_norm": 0.809856506892555, | |
| "learning_rate": 3.2124243613902316e-06, | |
| "loss": 0.1404, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.43897085721253504, | |
| "grad_norm": 0.9050899458550025, | |
| "learning_rate": 3.204683427878291e-06, | |
| "loss": 0.1467, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4399463480063407, | |
| "grad_norm": 0.8840121082189991, | |
| "learning_rate": 3.196935148081808e-06, | |
| "loss": 0.1446, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.44092183880014635, | |
| "grad_norm": 0.855404279656821, | |
| "learning_rate": 3.189179602776157e-06, | |
| "loss": 0.1362, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.44189732959395195, | |
| "grad_norm": 0.8416485853165431, | |
| "learning_rate": 3.181416872812455e-06, | |
| "loss": 0.1381, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.4428728203877576, | |
| "grad_norm": 0.826136304893414, | |
| "learning_rate": 3.1736470391167195e-06, | |
| "loss": 0.1439, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.4438483111815632, | |
| "grad_norm": 0.888073097598988, | |
| "learning_rate": 3.1658701826890237e-06, | |
| "loss": 0.1472, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.44482380197536886, | |
| "grad_norm": 0.8935863712022185, | |
| "learning_rate": 3.158086384602652e-06, | |
| "loss": 0.1401, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.4457992927691745, | |
| "grad_norm": 0.8809290645701081, | |
| "learning_rate": 3.150295726003256e-06, | |
| "loss": 0.1527, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.4467747835629801, | |
| "grad_norm": 0.8388623216005323, | |
| "learning_rate": 3.142498288108007e-06, | |
| "loss": 0.1365, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.44775027435678577, | |
| "grad_norm": 0.9162474538739859, | |
| "learning_rate": 3.13469415220475e-06, | |
| "loss": 0.1405, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.44872576515059137, | |
| "grad_norm": 0.9225875521669346, | |
| "learning_rate": 3.1268833996511584e-06, | |
| "loss": 0.1479, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.449701255944397, | |
| "grad_norm": 0.8772214033934059, | |
| "learning_rate": 3.119066111873879e-06, | |
| "loss": 0.1312, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.4506767467382027, | |
| "grad_norm": 0.8397821561643128, | |
| "learning_rate": 3.1112423703676937e-06, | |
| "loss": 0.1344, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.4516522375320083, | |
| "grad_norm": 0.9056702372440215, | |
| "learning_rate": 3.1034122566946596e-06, | |
| "loss": 0.1329, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.45262772832581394, | |
| "grad_norm": 0.9828835323972128, | |
| "learning_rate": 3.0955758524832663e-06, | |
| "loss": 0.1454, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.45360321911961954, | |
| "grad_norm": 0.949595108992025, | |
| "learning_rate": 3.0877332394275806e-06, | |
| "loss": 0.1393, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4545787099134252, | |
| "grad_norm": 0.8820964745475348, | |
| "learning_rate": 3.079884499286396e-06, | |
| "loss": 0.1303, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.45555420070723085, | |
| "grad_norm": 0.8539218038263852, | |
| "learning_rate": 3.0720297138823814e-06, | |
| "loss": 0.1369, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.45652969150103645, | |
| "grad_norm": 0.9065920605308372, | |
| "learning_rate": 3.0641689651012253e-06, | |
| "loss": 0.1418, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.4575051822948421, | |
| "grad_norm": 0.8602281531380704, | |
| "learning_rate": 3.056302334890786e-06, | |
| "loss": 0.1377, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.4584806730886477, | |
| "grad_norm": 0.8667507800649346, | |
| "learning_rate": 3.0484299052602355e-06, | |
| "loss": 0.1399, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.45945616388245336, | |
| "grad_norm": 0.913545132678621, | |
| "learning_rate": 3.040551758279204e-06, | |
| "loss": 0.1369, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.460431654676259, | |
| "grad_norm": 0.8738158833673688, | |
| "learning_rate": 3.032667976076923e-06, | |
| "loss": 0.1473, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.4614071454700646, | |
| "grad_norm": 0.8801680622206783, | |
| "learning_rate": 3.0247786408413725e-06, | |
| "loss": 0.1492, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.4623826362638703, | |
| "grad_norm": 0.876084094436277, | |
| "learning_rate": 3.0168838348184226e-06, | |
| "loss": 0.1429, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.4633581270576759, | |
| "grad_norm": 0.8429300809717286, | |
| "learning_rate": 3.008983640310976e-06, | |
| "loss": 0.1239, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.46433361785148153, | |
| "grad_norm": 0.8564086119743741, | |
| "learning_rate": 3.0010781396781104e-06, | |
| "loss": 0.1416, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.4653091086452872, | |
| "grad_norm": 0.9263549241409399, | |
| "learning_rate": 2.993167415334218e-06, | |
| "loss": 0.147, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.4662845994390928, | |
| "grad_norm": 0.9103066887514243, | |
| "learning_rate": 2.9852515497481494e-06, | |
| "loss": 0.149, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.46726009023289844, | |
| "grad_norm": 0.8633007227845262, | |
| "learning_rate": 2.977330625442352e-06, | |
| "loss": 0.1331, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.46823558102670404, | |
| "grad_norm": 0.9029118321491253, | |
| "learning_rate": 2.9694047249920095e-06, | |
| "loss": 0.1508, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4692110718205097, | |
| "grad_norm": 0.8011901107322851, | |
| "learning_rate": 2.961473931024182e-06, | |
| "loss": 0.1347, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.47018656261431535, | |
| "grad_norm": 0.953456056583228, | |
| "learning_rate": 2.953538326216944e-06, | |
| "loss": 0.1386, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.47116205340812095, | |
| "grad_norm": 0.9087484393028001, | |
| "learning_rate": 2.9455979932985237e-06, | |
| "loss": 0.1378, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.4721375442019266, | |
| "grad_norm": 0.8453741920558421, | |
| "learning_rate": 2.9376530150464388e-06, | |
| "loss": 0.1366, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4731130349957322, | |
| "grad_norm": 0.9941873485158667, | |
| "learning_rate": 2.9297034742866336e-06, | |
| "loss": 0.1436, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.47408852578953786, | |
| "grad_norm": 0.9362727469002547, | |
| "learning_rate": 2.921749453892618e-06, | |
| "loss": 0.1335, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4750640165833435, | |
| "grad_norm": 0.8486907157670049, | |
| "learning_rate": 2.913791036784601e-06, | |
| "loss": 0.1311, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.4760395073771491, | |
| "grad_norm": 0.941827373025834, | |
| "learning_rate": 2.9058283059286275e-06, | |
| "loss": 0.1384, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.4770149981709548, | |
| "grad_norm": 1.0578890093377278, | |
| "learning_rate": 2.8978613443357107e-06, | |
| "loss": 0.1532, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.4779904889647604, | |
| "grad_norm": 0.9196862748457214, | |
| "learning_rate": 2.889890235060972e-06, | |
| "loss": 0.1451, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.47896597975856603, | |
| "grad_norm": 0.9252094112063749, | |
| "learning_rate": 2.88191506120277e-06, | |
| "loss": 0.1478, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.4799414705523717, | |
| "grad_norm": 0.9936636709844183, | |
| "learning_rate": 2.873935905901839e-06, | |
| "loss": 0.1383, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4809169613461773, | |
| "grad_norm": 0.9220512507450012, | |
| "learning_rate": 2.865952852340417e-06, | |
| "loss": 0.1414, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.48189245213998294, | |
| "grad_norm": 0.8881328759002474, | |
| "learning_rate": 2.8579659837413816e-06, | |
| "loss": 0.1381, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.48286794293378854, | |
| "grad_norm": 0.9493132086314534, | |
| "learning_rate": 2.8499753833673827e-06, | |
| "loss": 0.1356, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.4838434337275942, | |
| "grad_norm": 0.8586849465773233, | |
| "learning_rate": 2.8419811345199736e-06, | |
| "loss": 0.1288, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.48481892452139985, | |
| "grad_norm": 0.8051743928108915, | |
| "learning_rate": 2.8339833205387434e-06, | |
| "loss": 0.1272, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.48579441531520545, | |
| "grad_norm": 0.8245960012411253, | |
| "learning_rate": 2.825982024800445e-06, | |
| "loss": 0.136, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.4867699061090111, | |
| "grad_norm": 0.8849438079565617, | |
| "learning_rate": 2.8179773307181307e-06, | |
| "loss": 0.1401, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.4877453969028167, | |
| "grad_norm": 1.0134005395077472, | |
| "learning_rate": 2.8099693217402807e-06, | |
| "loss": 0.1455, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4877453969028167, | |
| "eval_loss": 0.13670527935028076, | |
| "eval_runtime": 121.3231, | |
| "eval_samples_per_second": 5.465, | |
| "eval_steps_per_second": 0.684, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.48872088769662236, | |
| "grad_norm": 0.9319226416659817, | |
| "learning_rate": 2.8019580813499304e-06, | |
| "loss": 0.1443, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.489696378490428, | |
| "grad_norm": 0.9042087056844109, | |
| "learning_rate": 2.793943693063805e-06, | |
| "loss": 0.1382, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.4906718692842336, | |
| "grad_norm": 0.8443370914686048, | |
| "learning_rate": 2.7859262404314453e-06, | |
| "loss": 0.1226, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.4916473600780393, | |
| "grad_norm": 1.0430324211655913, | |
| "learning_rate": 2.7779058070343367e-06, | |
| "loss": 0.147, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.4926228508718449, | |
| "grad_norm": 0.9189006680486591, | |
| "learning_rate": 2.7698824764850406e-06, | |
| "loss": 0.1328, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.49359834166565053, | |
| "grad_norm": 0.8384229297869797, | |
| "learning_rate": 2.7618563324263208e-06, | |
| "loss": 0.1357, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.4945738324594562, | |
| "grad_norm": 0.9260402086024966, | |
| "learning_rate": 2.7538274585302707e-06, | |
| "loss": 0.1328, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.4955493232532618, | |
| "grad_norm": 0.8698081640274972, | |
| "learning_rate": 2.745795938497443e-06, | |
| "loss": 0.1334, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.49652481404706744, | |
| "grad_norm": 0.9155791950153026, | |
| "learning_rate": 2.737761856055975e-06, | |
| "loss": 0.1355, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.49750030484087304, | |
| "grad_norm": 0.9389141674328999, | |
| "learning_rate": 2.7297252949607195e-06, | |
| "loss": 0.1338, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4984757956346787, | |
| "grad_norm": 0.9095624175043169, | |
| "learning_rate": 2.721686338992366e-06, | |
| "loss": 0.1371, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.49945128642848435, | |
| "grad_norm": 0.8431669151453126, | |
| "learning_rate": 2.7136450719565732e-06, | |
| "loss": 0.1246, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.50042677722229, | |
| "grad_norm": 0.8523982451840252, | |
| "learning_rate": 2.7056015776830907e-06, | |
| "loss": 0.1227, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5014022680160956, | |
| "grad_norm": 0.8800355441963557, | |
| "learning_rate": 2.6975559400248876e-06, | |
| "loss": 0.1376, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5023777588099012, | |
| "grad_norm": 0.9058981894611112, | |
| "learning_rate": 2.6895082428572777e-06, | |
| "loss": 0.1417, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5033532496037069, | |
| "grad_norm": 0.8780804727101097, | |
| "learning_rate": 2.681458570077045e-06, | |
| "loss": 0.1346, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5043287403975125, | |
| "grad_norm": 0.9298727825050748, | |
| "learning_rate": 2.673407005601569e-06, | |
| "loss": 0.1457, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5053042311913182, | |
| "grad_norm": 0.8489401556484758, | |
| "learning_rate": 2.6653536333679504e-06, | |
| "loss": 0.1331, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5062797219851237, | |
| "grad_norm": 0.8898533627090344, | |
| "learning_rate": 2.6572985373321344e-06, | |
| "loss": 0.1434, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5072552127789294, | |
| "grad_norm": 0.9081368712381108, | |
| "learning_rate": 2.649241801468039e-06, | |
| "loss": 0.1403, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.508230703572735, | |
| "grad_norm": 0.8906355358423613, | |
| "learning_rate": 2.641183509766675e-06, | |
| "loss": 0.1309, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5092061943665407, | |
| "grad_norm": 0.9492976151605389, | |
| "learning_rate": 2.633123746235274e-06, | |
| "loss": 0.1434, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5101816851603463, | |
| "grad_norm": 0.9210546856991715, | |
| "learning_rate": 2.6250625948964125e-06, | |
| "loss": 0.1362, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5111571759541519, | |
| "grad_norm": 0.884606453071947, | |
| "learning_rate": 2.617000139787132e-06, | |
| "loss": 0.1412, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5121326667479575, | |
| "grad_norm": 0.9149743385923425, | |
| "learning_rate": 2.608936464958068e-06, | |
| "loss": 0.1471, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5131081575417632, | |
| "grad_norm": 0.9748860734433248, | |
| "learning_rate": 2.6008716544725692e-06, | |
| "loss": 0.1419, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5140836483355689, | |
| "grad_norm": 0.8273165692055457, | |
| "learning_rate": 2.5928057924058264e-06, | |
| "loss": 0.1289, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5150591391293745, | |
| "grad_norm": 0.9005080399868409, | |
| "learning_rate": 2.5847389628439905e-06, | |
| "loss": 0.1295, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.51603462992318, | |
| "grad_norm": 0.8085104883382979, | |
| "learning_rate": 2.576671249883301e-06, | |
| "loss": 0.1341, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5170101207169857, | |
| "grad_norm": 0.9357055967861888, | |
| "learning_rate": 2.568602737629204e-06, | |
| "loss": 0.1383, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5179856115107914, | |
| "grad_norm": 0.9860801651928193, | |
| "learning_rate": 2.5605335101954796e-06, | |
| "loss": 0.1395, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.518961102304597, | |
| "grad_norm": 1.0023841873957609, | |
| "learning_rate": 2.552463651703365e-06, | |
| "loss": 0.1391, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5199365930984027, | |
| "grad_norm": 0.9970746647895117, | |
| "learning_rate": 2.5443932462806733e-06, | |
| "loss": 0.1345, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5209120838922082, | |
| "grad_norm": 0.8823424259442537, | |
| "learning_rate": 2.5363223780609214e-06, | |
| "loss": 0.1303, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5218875746860139, | |
| "grad_norm": 0.8990272295179849, | |
| "learning_rate": 2.52825113118245e-06, | |
| "loss": 0.138, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5228630654798195, | |
| "grad_norm": 0.964490190997079, | |
| "learning_rate": 2.520179589787547e-06, | |
| "loss": 0.1419, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5238385562736252, | |
| "grad_norm": 0.9850971200909243, | |
| "learning_rate": 2.5121078380215713e-06, | |
| "loss": 0.1378, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5248140470674308, | |
| "grad_norm": 0.8473816563308804, | |
| "learning_rate": 2.5040359600320747e-06, | |
| "loss": 0.1331, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5257895378612364, | |
| "grad_norm": 0.9063261263061652, | |
| "learning_rate": 2.4959640399679253e-06, | |
| "loss": 0.143, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.526765028655042, | |
| "grad_norm": 0.8833983728636542, | |
| "learning_rate": 2.4878921619784295e-06, | |
| "loss": 0.1337, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5277405194488477, | |
| "grad_norm": 0.8657377624673289, | |
| "learning_rate": 2.4798204102124533e-06, | |
| "loss": 0.1344, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5287160102426534, | |
| "grad_norm": 0.9202779408760426, | |
| "learning_rate": 2.4717488688175513e-06, | |
| "loss": 0.1315, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.529691501036459, | |
| "grad_norm": 0.9088907887564982, | |
| "learning_rate": 2.4636776219390794e-06, | |
| "loss": 0.1329, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5306669918302646, | |
| "grad_norm": 0.9310598535059849, | |
| "learning_rate": 2.4556067537193276e-06, | |
| "loss": 0.1338, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5316424826240702, | |
| "grad_norm": 0.9003405042179257, | |
| "learning_rate": 2.4475363482966356e-06, | |
| "loss": 0.1362, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5326179734178759, | |
| "grad_norm": 0.8206989405119627, | |
| "learning_rate": 2.4394664898045208e-06, | |
| "loss": 0.133, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5335934642116815, | |
| "grad_norm": 0.8742977014572649, | |
| "learning_rate": 2.4313972623707964e-06, | |
| "loss": 0.1343, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5345689550054872, | |
| "grad_norm": 0.8727361447556689, | |
| "learning_rate": 2.4233287501167e-06, | |
| "loss": 0.1318, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5355444457992927, | |
| "grad_norm": 0.9233330686910666, | |
| "learning_rate": 2.4152610371560095e-06, | |
| "loss": 0.141, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.5365199365930984, | |
| "grad_norm": 0.8918087087553142, | |
| "learning_rate": 2.4071942075941744e-06, | |
| "loss": 0.1329, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.537495427386904, | |
| "grad_norm": 0.849661983110438, | |
| "learning_rate": 2.3991283455274316e-06, | |
| "loss": 0.1288, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.5384709181807097, | |
| "grad_norm": 0.8538891952062707, | |
| "learning_rate": 2.391063535041933e-06, | |
| "loss": 0.1322, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5394464089745153, | |
| "grad_norm": 0.8801508104924974, | |
| "learning_rate": 2.3829998602128685e-06, | |
| "loss": 0.1239, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5404218997683209, | |
| "grad_norm": 0.8524417866635733, | |
| "learning_rate": 2.3749374051035883e-06, | |
| "loss": 0.1308, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5413973905621265, | |
| "grad_norm": 0.8827154465529061, | |
| "learning_rate": 2.3668762537647254e-06, | |
| "loss": 0.1377, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 0.8050527885767014, | |
| "learning_rate": 2.358816490233326e-06, | |
| "loss": 0.127, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5433483721497379, | |
| "grad_norm": 0.8172596997607479, | |
| "learning_rate": 2.3507581985319623e-06, | |
| "loss": 0.1212, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5443238629435435, | |
| "grad_norm": 0.8963200330399226, | |
| "learning_rate": 2.342701462667866e-06, | |
| "loss": 0.1401, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.5452993537373491, | |
| "grad_norm": 0.887052377675435, | |
| "learning_rate": 2.3346463666320512e-06, | |
| "loss": 0.1323, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5462748445311547, | |
| "grad_norm": 0.9127806776302525, | |
| "learning_rate": 2.3265929943984317e-06, | |
| "loss": 0.1361, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5472503353249604, | |
| "grad_norm": 0.9147589526822345, | |
| "learning_rate": 2.318541429922956e-06, | |
| "loss": 0.1363, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.548225826118766, | |
| "grad_norm": 0.8944082387887866, | |
| "learning_rate": 2.310491757142723e-06, | |
| "loss": 0.1414, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5492013169125717, | |
| "grad_norm": 0.8666818426784915, | |
| "learning_rate": 2.3024440599751132e-06, | |
| "loss": 0.1338, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5501768077063772, | |
| "grad_norm": 0.8384112838731508, | |
| "learning_rate": 2.29439842231691e-06, | |
| "loss": 0.128, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5511522985001829, | |
| "grad_norm": 0.8598917633955132, | |
| "learning_rate": 2.2863549280434285e-06, | |
| "loss": 0.1378, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5521277892939885, | |
| "grad_norm": 0.8490812429791822, | |
| "learning_rate": 2.2783136610076345e-06, | |
| "loss": 0.1302, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.5531032800877942, | |
| "grad_norm": 0.9671910225128955, | |
| "learning_rate": 2.270274705039282e-06, | |
| "loss": 0.1371, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.5540787708815998, | |
| "grad_norm": 0.9181284807350794, | |
| "learning_rate": 2.2622381439440255e-06, | |
| "loss": 0.1435, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.5550542616754054, | |
| "grad_norm": 0.7950987192294108, | |
| "learning_rate": 2.2542040615025584e-06, | |
| "loss": 0.1219, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.556029752469211, | |
| "grad_norm": 1.0089717587920861, | |
| "learning_rate": 2.24617254146973e-06, | |
| "loss": 0.1292, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5570052432630167, | |
| "grad_norm": 0.9858255149161776, | |
| "learning_rate": 2.23814366757368e-06, | |
| "loss": 0.1393, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.5579807340568224, | |
| "grad_norm": 0.8129514529071972, | |
| "learning_rate": 2.23011752351496e-06, | |
| "loss": 0.1235, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.558956224850628, | |
| "grad_norm": 0.9188253431247291, | |
| "learning_rate": 2.222094192965664e-06, | |
| "loss": 0.1275, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5599317156444336, | |
| "grad_norm": 0.9256786564401078, | |
| "learning_rate": 2.214073759568555e-06, | |
| "loss": 0.1383, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5609072064382392, | |
| "grad_norm": 0.810478233219589, | |
| "learning_rate": 2.2060563069361955e-06, | |
| "loss": 0.1221, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5618826972320449, | |
| "grad_norm": 0.8139352487642532, | |
| "learning_rate": 2.19804191865007e-06, | |
| "loss": 0.1203, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.5628581880258505, | |
| "grad_norm": 0.8148607289634421, | |
| "learning_rate": 2.19003067825972e-06, | |
| "loss": 0.127, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.5638336788196562, | |
| "grad_norm": 0.8730605763594029, | |
| "learning_rate": 2.1820226692818693e-06, | |
| "loss": 0.1294, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.5648091696134617, | |
| "grad_norm": 0.9081540528860818, | |
| "learning_rate": 2.174017975199556e-06, | |
| "loss": 0.1418, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.5657846604072674, | |
| "grad_norm": 0.964077636816459, | |
| "learning_rate": 2.1660166794612574e-06, | |
| "loss": 0.1549, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.566760151201073, | |
| "grad_norm": 0.8682612244819073, | |
| "learning_rate": 2.158018865480027e-06, | |
| "loss": 0.1339, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.5677356419948787, | |
| "grad_norm": 0.9087722584693885, | |
| "learning_rate": 2.1500246166326177e-06, | |
| "loss": 0.138, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.5687111327886843, | |
| "grad_norm": 0.8284282518619499, | |
| "learning_rate": 2.1420340162586196e-06, | |
| "loss": 0.1273, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.5696866235824899, | |
| "grad_norm": 0.899367551533117, | |
| "learning_rate": 2.1340471476595836e-06, | |
| "loss": 0.1325, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.5706621143762955, | |
| "grad_norm": 0.8721503402061037, | |
| "learning_rate": 2.1260640940981616e-06, | |
| "loss": 0.1373, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.5716376051701012, | |
| "grad_norm": 0.8007443556277751, | |
| "learning_rate": 2.11808493879723e-06, | |
| "loss": 0.1314, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.5726130959639069, | |
| "grad_norm": 0.843399287875525, | |
| "learning_rate": 2.110109764939029e-06, | |
| "loss": 0.1273, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.5735885867577125, | |
| "grad_norm": 0.8694236616188882, | |
| "learning_rate": 2.10213865566429e-06, | |
| "loss": 0.1292, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.5745640775515181, | |
| "grad_norm": 0.944807048930237, | |
| "learning_rate": 2.0941716940713733e-06, | |
| "loss": 0.136, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.8012154070123548, | |
| "learning_rate": 2.086208963215399e-06, | |
| "loss": 0.1219, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5765150591391294, | |
| "grad_norm": 0.9713169032385847, | |
| "learning_rate": 2.0782505461073822e-06, | |
| "loss": 0.1272, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.577490549932935, | |
| "grad_norm": 0.833986482831455, | |
| "learning_rate": 2.0702965257133664e-06, | |
| "loss": 0.1211, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5784660407267407, | |
| "grad_norm": 0.9223111755392438, | |
| "learning_rate": 2.062346984953562e-06, | |
| "loss": 0.1229, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.5794415315205462, | |
| "grad_norm": 0.9302281584720075, | |
| "learning_rate": 2.0544020067014776e-06, | |
| "loss": 0.1383, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5804170223143519, | |
| "grad_norm": 0.9426149459659149, | |
| "learning_rate": 2.0464616737830566e-06, | |
| "loss": 0.1337, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5813925131081575, | |
| "grad_norm": 0.8882577504858508, | |
| "learning_rate": 2.03852606897582e-06, | |
| "loss": 0.1244, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.5823680039019632, | |
| "grad_norm": 0.8456021234816907, | |
| "learning_rate": 2.0305952750079918e-06, | |
| "loss": 0.1324, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.5833434946957688, | |
| "grad_norm": 0.933740394118853, | |
| "learning_rate": 2.0226693745576494e-06, | |
| "loss": 0.136, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5843189854895744, | |
| "grad_norm": 0.9462976911157949, | |
| "learning_rate": 2.0147484502518514e-06, | |
| "loss": 0.1286, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.58529447628338, | |
| "grad_norm": 0.9098208372409959, | |
| "learning_rate": 2.006832584665783e-06, | |
| "loss": 0.132, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5862699670771857, | |
| "grad_norm": 0.795331526345968, | |
| "learning_rate": 1.99892186032189e-06, | |
| "loss": 0.1151, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.5872454578709914, | |
| "grad_norm": 0.8664446011660321, | |
| "learning_rate": 1.9910163596890247e-06, | |
| "loss": 0.1288, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.588220948664797, | |
| "grad_norm": 0.8610758137851285, | |
| "learning_rate": 1.983116165181578e-06, | |
| "loss": 0.1239, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.5891964394586026, | |
| "grad_norm": 0.8560179576447229, | |
| "learning_rate": 1.9752213591586288e-06, | |
| "loss": 0.1276, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.5901719302524082, | |
| "grad_norm": 0.8603930368509891, | |
| "learning_rate": 1.9673320239230783e-06, | |
| "loss": 0.1305, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.5911474210462139, | |
| "grad_norm": 0.8443701642736344, | |
| "learning_rate": 1.9594482417207973e-06, | |
| "loss": 0.1261, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5921229118400195, | |
| "grad_norm": 0.7909939129569269, | |
| "learning_rate": 1.951570094739765e-06, | |
| "loss": 0.1211, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.5930984026338252, | |
| "grad_norm": 0.931299792718923, | |
| "learning_rate": 1.9436976651092143e-06, | |
| "loss": 0.1382, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5940738934276307, | |
| "grad_norm": 0.8781253875372456, | |
| "learning_rate": 1.9358310348987755e-06, | |
| "loss": 0.1273, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.5950493842214364, | |
| "grad_norm": 0.8570421027558416, | |
| "learning_rate": 1.9279702861176203e-06, | |
| "loss": 0.1342, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.596024875015242, | |
| "grad_norm": 0.8451871775967923, | |
| "learning_rate": 1.9201155007136045e-06, | |
| "loss": 0.1365, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.5970003658090477, | |
| "grad_norm": 0.9384520823084104, | |
| "learning_rate": 1.9122667605724202e-06, | |
| "loss": 0.1348, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5979758566028534, | |
| "grad_norm": 0.8922350956011054, | |
| "learning_rate": 1.9044241475167339e-06, | |
| "loss": 0.1348, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.5989513473966589, | |
| "grad_norm": 0.8385907676608247, | |
| "learning_rate": 1.8965877433053409e-06, | |
| "loss": 0.1287, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5999268381904646, | |
| "grad_norm": 0.8438875643530197, | |
| "learning_rate": 1.8887576296323069e-06, | |
| "loss": 0.1234, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6009023289842702, | |
| "grad_norm": 0.8782492476271099, | |
| "learning_rate": 1.8809338881261213e-06, | |
| "loss": 0.1312, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6018778197780759, | |
| "grad_norm": 0.8210821361205313, | |
| "learning_rate": 1.8731166003488427e-06, | |
| "loss": 0.1256, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.6028533105718815, | |
| "grad_norm": 0.8313931745166832, | |
| "learning_rate": 1.86530584779525e-06, | |
| "loss": 0.1269, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6038288013656871, | |
| "grad_norm": 0.8248814372114766, | |
| "learning_rate": 1.857501711891993e-06, | |
| "loss": 0.1277, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6048042921594927, | |
| "grad_norm": 0.8300683641876326, | |
| "learning_rate": 1.8497042739967447e-06, | |
| "loss": 0.1321, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6057797829532984, | |
| "grad_norm": 0.8967700470622615, | |
| "learning_rate": 1.841913615397348e-06, | |
| "loss": 0.1273, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.606755273747104, | |
| "grad_norm": 0.937831066737166, | |
| "learning_rate": 1.834129817310977e-06, | |
| "loss": 0.1293, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6077307645409097, | |
| "grad_norm": 0.8608866113112006, | |
| "learning_rate": 1.826352960883281e-06, | |
| "loss": 0.1276, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6087062553347152, | |
| "grad_norm": 0.8562897857077051, | |
| "learning_rate": 1.8185831271875457e-06, | |
| "loss": 0.1204, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6096817461285209, | |
| "grad_norm": 0.8326388599515545, | |
| "learning_rate": 1.8108203972238436e-06, | |
| "loss": 0.124, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6106572369223265, | |
| "grad_norm": 0.9146045731524001, | |
| "learning_rate": 1.8030648519181926e-06, | |
| "loss": 0.1327, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6116327277161322, | |
| "grad_norm": 0.8125344568360147, | |
| "learning_rate": 1.7953165721217086e-06, | |
| "loss": 0.1206, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6126082185099379, | |
| "grad_norm": 0.8788372999011178, | |
| "learning_rate": 1.7875756386097686e-06, | |
| "loss": 0.1376, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6135837093037434, | |
| "grad_norm": 0.8647255719378499, | |
| "learning_rate": 1.779842132081162e-06, | |
| "loss": 0.1243, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.614559200097549, | |
| "grad_norm": 0.8756476987196234, | |
| "learning_rate": 1.7721161331572522e-06, | |
| "loss": 0.132, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6155346908913547, | |
| "grad_norm": 0.9563185708098122, | |
| "learning_rate": 1.7643977223811392e-06, | |
| "loss": 0.1364, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6165101816851604, | |
| "grad_norm": 0.8320678225276503, | |
| "learning_rate": 1.7566869802168132e-06, | |
| "loss": 0.1189, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.617485672478966, | |
| "grad_norm": 0.9093829860333068, | |
| "learning_rate": 1.7489839870483236e-06, | |
| "loss": 0.1277, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.6184611632727716, | |
| "grad_norm": 0.9011561073082258, | |
| "learning_rate": 1.7412888231789327e-06, | |
| "loss": 0.1358, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6194366540665772, | |
| "grad_norm": 1.0084984996858963, | |
| "learning_rate": 1.7336015688302869e-06, | |
| "loss": 0.1268, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6204121448603829, | |
| "grad_norm": 0.9152283340289553, | |
| "learning_rate": 1.725922304141573e-06, | |
| "loss": 0.1295, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6213876356541885, | |
| "grad_norm": 0.9238351863571306, | |
| "learning_rate": 1.718251109168688e-06, | |
| "loss": 0.1318, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6223631264479942, | |
| "grad_norm": 0.866304353620812, | |
| "learning_rate": 1.7105880638834007e-06, | |
| "loss": 0.1212, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6233386172417997, | |
| "grad_norm": 0.95501299054228, | |
| "learning_rate": 1.7029332481725234e-06, | |
| "loss": 0.1364, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6243141080356054, | |
| "grad_norm": 0.8839661191980624, | |
| "learning_rate": 1.6952867418370707e-06, | |
| "loss": 0.127, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.625289598829411, | |
| "grad_norm": 0.8559388882214561, | |
| "learning_rate": 1.6876486245914375e-06, | |
| "loss": 0.1249, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6262650896232167, | |
| "grad_norm": 0.9364435799205211, | |
| "learning_rate": 1.6800189760625585e-06, | |
| "loss": 0.1329, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6272405804170224, | |
| "grad_norm": 0.8651014555168739, | |
| "learning_rate": 1.672397875789086e-06, | |
| "loss": 0.125, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.6282160712108279, | |
| "grad_norm": 0.8892108367186486, | |
| "learning_rate": 1.6647854032205547e-06, | |
| "loss": 0.1304, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6291915620046336, | |
| "grad_norm": 0.8503679132174746, | |
| "learning_rate": 1.6571816377165568e-06, | |
| "loss": 0.1249, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6301670527984392, | |
| "grad_norm": 0.9314057258730163, | |
| "learning_rate": 1.6495866585459142e-06, | |
| "loss": 0.131, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6311425435922449, | |
| "grad_norm": 0.9002310278187038, | |
| "learning_rate": 1.6420005448858522e-06, | |
| "loss": 0.1329, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6321180343860505, | |
| "grad_norm": 0.8263610327236112, | |
| "learning_rate": 1.6344233758211717e-06, | |
| "loss": 0.1217, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.6330935251798561, | |
| "grad_norm": 0.8099996895450952, | |
| "learning_rate": 1.6268552303434298e-06, | |
| "loss": 0.1211, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.6340690159736617, | |
| "grad_norm": 0.8528126491211017, | |
| "learning_rate": 1.6192961873501096e-06, | |
| "loss": 0.1297, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6350445067674674, | |
| "grad_norm": 0.8585959373381494, | |
| "learning_rate": 1.611746325643805e-06, | |
| "loss": 0.1295, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.636019997561273, | |
| "grad_norm": 0.8655236759185483, | |
| "learning_rate": 1.6042057239313919e-06, | |
| "loss": 0.1246, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.6369954883550787, | |
| "grad_norm": 0.8207113182539624, | |
| "learning_rate": 1.5966744608232137e-06, | |
| "loss": 0.12, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.6379709791488842, | |
| "grad_norm": 0.8614663092183132, | |
| "learning_rate": 1.5891526148322594e-06, | |
| "loss": 0.1187, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.6389464699426899, | |
| "grad_norm": 0.9001233935199348, | |
| "learning_rate": 1.5816402643733441e-06, | |
| "loss": 0.1276, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6399219607364955, | |
| "grad_norm": 0.8508200436128184, | |
| "learning_rate": 1.5741374877622922e-06, | |
| "loss": 0.1267, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6408974515303012, | |
| "grad_norm": 0.9227739705381223, | |
| "learning_rate": 1.566644363215123e-06, | |
| "loss": 0.1344, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6418729423241069, | |
| "grad_norm": 0.853646215929862, | |
| "learning_rate": 1.5591609688472313e-06, | |
| "loss": 0.1363, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6428484331179124, | |
| "grad_norm": 0.9878682768114257, | |
| "learning_rate": 1.551687382672578e-06, | |
| "loss": 0.1354, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.643823923911718, | |
| "grad_norm": 0.8622608892200061, | |
| "learning_rate": 1.5442236826028705e-06, | |
| "loss": 0.1281, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6447994147055237, | |
| "grad_norm": 0.8868098887146023, | |
| "learning_rate": 1.5367699464467596e-06, | |
| "loss": 0.1316, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.6457749054993294, | |
| "grad_norm": 0.8568792305689042, | |
| "learning_rate": 1.5293262519090169e-06, | |
| "loss": 0.1289, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.646750396293135, | |
| "grad_norm": 0.8345103045814822, | |
| "learning_rate": 1.5218926765897345e-06, | |
| "loss": 0.1238, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.6477258870869406, | |
| "grad_norm": 0.8296168710839235, | |
| "learning_rate": 1.5144692979835103e-06, | |
| "loss": 0.1279, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.6487013778807462, | |
| "grad_norm": 0.8651995871451914, | |
| "learning_rate": 1.5070561934786437e-06, | |
| "loss": 0.131, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6496768686745519, | |
| "grad_norm": 0.9169910575714909, | |
| "learning_rate": 1.4996534403563267e-06, | |
| "loss": 0.1334, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.6506523594683575, | |
| "grad_norm": 0.7693306954167114, | |
| "learning_rate": 1.4922611157898364e-06, | |
| "loss": 0.1168, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.6516278502621632, | |
| "grad_norm": 0.863479055519848, | |
| "learning_rate": 1.4848792968437376e-06, | |
| "loss": 0.1248, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.6526033410559687, | |
| "grad_norm": 0.8668901068891359, | |
| "learning_rate": 1.477508060473069e-06, | |
| "loss": 0.1321, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.6535788318497744, | |
| "grad_norm": 0.8378480519694856, | |
| "learning_rate": 1.4701474835225515e-06, | |
| "loss": 0.1312, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.65455432264358, | |
| "grad_norm": 0.811001196208897, | |
| "learning_rate": 1.4627976427257784e-06, | |
| "loss": 0.1201, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.6555298134373857, | |
| "grad_norm": 0.8636188619772824, | |
| "learning_rate": 1.4554586147044225e-06, | |
| "loss": 0.1365, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.6565053042311914, | |
| "grad_norm": 0.85744996390658, | |
| "learning_rate": 1.4481304759674303e-06, | |
| "loss": 0.1249, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.6574807950249969, | |
| "grad_norm": 0.8435636506247961, | |
| "learning_rate": 1.4408133029102322e-06, | |
| "loss": 0.1195, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.6584562858188026, | |
| "grad_norm": 0.8509255085231769, | |
| "learning_rate": 1.4335071718139379e-06, | |
| "loss": 0.1183, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6594317766126082, | |
| "grad_norm": 0.8793960273956329, | |
| "learning_rate": 1.4262121588445493e-06, | |
| "loss": 0.1245, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.6604072674064139, | |
| "grad_norm": 0.8970156523355322, | |
| "learning_rate": 1.418928340052159e-06, | |
| "loss": 0.1286, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.6613827582002195, | |
| "grad_norm": 0.8644162321708901, | |
| "learning_rate": 1.411655791370164e-06, | |
| "loss": 0.1252, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.6623582489940251, | |
| "grad_norm": 0.8396437394115862, | |
| "learning_rate": 1.4043945886144673e-06, | |
| "loss": 0.1179, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.6633337397878307, | |
| "grad_norm": 0.8448586465328236, | |
| "learning_rate": 1.397144807482696e-06, | |
| "loss": 0.1252, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6643092305816364, | |
| "grad_norm": 0.8718372351836149, | |
| "learning_rate": 1.3899065235534031e-06, | |
| "loss": 0.1216, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.665284721375442, | |
| "grad_norm": 0.8407139122291305, | |
| "learning_rate": 1.382679812285287e-06, | |
| "loss": 0.1283, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.6662602121692477, | |
| "grad_norm": 0.8404593100461625, | |
| "learning_rate": 1.375464749016399e-06, | |
| "loss": 0.1188, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.6672357029630532, | |
| "grad_norm": 0.940858079210062, | |
| "learning_rate": 1.3682614089633637e-06, | |
| "loss": 0.129, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.6682111937568589, | |
| "grad_norm": 0.8569084229583452, | |
| "learning_rate": 1.3610698672205897e-06, | |
| "loss": 0.1281, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.6691866845506645, | |
| "grad_norm": 0.8797960805331662, | |
| "learning_rate": 1.3538901987594882e-06, | |
| "loss": 0.1292, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.6701621753444702, | |
| "grad_norm": 0.8374942493553367, | |
| "learning_rate": 1.3467224784276961e-06, | |
| "loss": 0.1309, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.6711376661382759, | |
| "grad_norm": 0.7710018857731183, | |
| "learning_rate": 1.339566780948287e-06, | |
| "loss": 0.1139, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.6721131569320814, | |
| "grad_norm": 0.8657391409702617, | |
| "learning_rate": 1.3324231809189985e-06, | |
| "loss": 0.1309, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.6730886477258871, | |
| "grad_norm": 0.8127948325482396, | |
| "learning_rate": 1.3252917528114556e-06, | |
| "loss": 0.1267, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6740641385196927, | |
| "grad_norm": 0.8122952852781401, | |
| "learning_rate": 1.3181725709703874e-06, | |
| "loss": 0.1213, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.6750396293134984, | |
| "grad_norm": 0.851528828559218, | |
| "learning_rate": 1.3110657096128606e-06, | |
| "loss": 0.12, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.676015120107304, | |
| "grad_norm": 0.86862592870714, | |
| "learning_rate": 1.3039712428274976e-06, | |
| "loss": 0.1231, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.6769906109011096, | |
| "grad_norm": 0.8696639730293406, | |
| "learning_rate": 1.2968892445737117e-06, | |
| "loss": 0.1294, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.9002780776036385, | |
| "learning_rate": 1.2898197886809289e-06, | |
| "loss": 0.1256, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6789415924887209, | |
| "grad_norm": 0.8530790957831813, | |
| "learning_rate": 1.2827629488478254e-06, | |
| "loss": 0.13, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.6799170832825265, | |
| "grad_norm": 0.8450717054989292, | |
| "learning_rate": 1.2757187986415528e-06, | |
| "loss": 0.1306, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.6808925740763322, | |
| "grad_norm": 0.8714024729066125, | |
| "learning_rate": 1.2686874114969767e-06, | |
| "loss": 0.1323, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.6818680648701377, | |
| "grad_norm": 0.8591419822215062, | |
| "learning_rate": 1.2616688607159045e-06, | |
| "loss": 0.1396, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.6828435556639434, | |
| "grad_norm": 0.8361114781431095, | |
| "learning_rate": 1.2546632194663305e-06, | |
| "loss": 0.1254, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.683819046457749, | |
| "grad_norm": 0.8242742327238427, | |
| "learning_rate": 1.2476705607816639e-06, | |
| "loss": 0.1234, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.6847945372515547, | |
| "grad_norm": 0.8637116393284514, | |
| "learning_rate": 1.2406909575599717e-06, | |
| "loss": 0.125, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.6857700280453604, | |
| "grad_norm": 0.8884418622966431, | |
| "learning_rate": 1.2337244825632217e-06, | |
| "loss": 0.135, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.6867455188391659, | |
| "grad_norm": 0.8275460859037888, | |
| "learning_rate": 1.2267712084165168e-06, | |
| "loss": 0.1158, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.6877210096329716, | |
| "grad_norm": 0.8553642751195779, | |
| "learning_rate": 1.2198312076073427e-06, | |
| "loss": 0.128, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.6886965004267772, | |
| "grad_norm": 0.8837324796776882, | |
| "learning_rate": 1.2129045524848138e-06, | |
| "loss": 0.128, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.6896719912205829, | |
| "grad_norm": 0.8270144975513911, | |
| "learning_rate": 1.205991315258914e-06, | |
| "loss": 0.1185, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.6906474820143885, | |
| "grad_norm": 0.8738272053373212, | |
| "learning_rate": 1.199091567999746e-06, | |
| "loss": 0.1264, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.6916229728081941, | |
| "grad_norm": 0.8344314699090399, | |
| "learning_rate": 1.1922053826367833e-06, | |
| "loss": 0.1152, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.6925984636019997, | |
| "grad_norm": 0.8650165946821683, | |
| "learning_rate": 1.1853328309581139e-06, | |
| "loss": 0.1283, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6935739543958054, | |
| "grad_norm": 0.8374700294304306, | |
| "learning_rate": 1.1784739846096982e-06, | |
| "loss": 0.1238, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.694549445189611, | |
| "grad_norm": 0.8380502969131793, | |
| "learning_rate": 1.1716289150946173e-06, | |
| "loss": 0.1234, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.6955249359834167, | |
| "grad_norm": 0.9064794381405805, | |
| "learning_rate": 1.1647976937723315e-06, | |
| "loss": 0.1306, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.6965004267772222, | |
| "grad_norm": 0.832398941447385, | |
| "learning_rate": 1.1579803918579322e-06, | |
| "loss": 0.125, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.6974759175710279, | |
| "grad_norm": 0.8174727802602623, | |
| "learning_rate": 1.1511770804214046e-06, | |
| "loss": 0.1248, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6984514083648335, | |
| "grad_norm": 0.8474018969337508, | |
| "learning_rate": 1.1443878303868805e-06, | |
| "loss": 0.1338, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6994268991586392, | |
| "grad_norm": 0.8466582401794491, | |
| "learning_rate": 1.1376127125319065e-06, | |
| "loss": 0.1148, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7004023899524449, | |
| "grad_norm": 0.8912296637806607, | |
| "learning_rate": 1.1308517974866973e-06, | |
| "loss": 0.1274, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7013778807462504, | |
| "grad_norm": 0.9491089244563283, | |
| "learning_rate": 1.1241051557334087e-06, | |
| "loss": 0.124, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7023533715400561, | |
| "grad_norm": 0.9380181290695212, | |
| "learning_rate": 1.117372857605395e-06, | |
| "loss": 0.1354, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7033288623338617, | |
| "grad_norm": 0.8177994093605291, | |
| "learning_rate": 1.110654973286481e-06, | |
| "loss": 0.1164, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7043043531276674, | |
| "grad_norm": 0.8347927318132022, | |
| "learning_rate": 1.1039515728102273e-06, | |
| "loss": 0.1275, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.705279843921473, | |
| "grad_norm": 0.8970474242898054, | |
| "learning_rate": 1.0972627260592014e-06, | |
| "loss": 0.1231, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7062553347152786, | |
| "grad_norm": 0.795847982137367, | |
| "learning_rate": 1.0905885027642484e-06, | |
| "loss": 0.1177, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7072308255090842, | |
| "grad_norm": 0.8814347768470174, | |
| "learning_rate": 1.0839289725037669e-06, | |
| "loss": 0.1334, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7082063163028899, | |
| "grad_norm": 0.828253666509173, | |
| "learning_rate": 1.0772842047029786e-06, | |
| "loss": 0.1218, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7091818070966955, | |
| "grad_norm": 0.812866815136845, | |
| "learning_rate": 1.0706542686332102e-06, | |
| "loss": 0.1143, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7101572978905012, | |
| "grad_norm": 0.8556204829905426, | |
| "learning_rate": 1.064039233411166e-06, | |
| "loss": 0.1264, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7111327886843067, | |
| "grad_norm": 0.8841697516339617, | |
| "learning_rate": 1.057439167998212e-06, | |
| "loss": 0.1278, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7121082794781124, | |
| "grad_norm": 0.8636362569900907, | |
| "learning_rate": 1.050854141199652e-06, | |
| "loss": 0.1235, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.713083770271918, | |
| "grad_norm": 0.8283746245515586, | |
| "learning_rate": 1.0442842216640168e-06, | |
| "loss": 0.1208, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7140592610657237, | |
| "grad_norm": 0.8728491657133771, | |
| "learning_rate": 1.037729477882341e-06, | |
| "loss": 0.117, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7150347518595294, | |
| "grad_norm": 0.8915555008915115, | |
| "learning_rate": 1.0311899781874563e-06, | |
| "loss": 0.1293, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7160102426533349, | |
| "grad_norm": 0.8380362900075755, | |
| "learning_rate": 1.0246657907532727e-06, | |
| "loss": 0.1216, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7169857334471406, | |
| "grad_norm": 0.8565497720933896, | |
| "learning_rate": 1.018156983594073e-06, | |
| "loss": 0.1149, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7179612242409462, | |
| "grad_norm": 0.8358087384760822, | |
| "learning_rate": 1.0116636245637997e-06, | |
| "loss": 0.115, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7189367150347519, | |
| "grad_norm": 0.8847500849421209, | |
| "learning_rate": 1.0051857813553516e-06, | |
| "loss": 0.123, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.7199122058285575, | |
| "grad_norm": 0.8794173909505707, | |
| "learning_rate": 9.987235214998741e-07, | |
| "loss": 0.1277, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7208876966223631, | |
| "grad_norm": 0.8665043928636729, | |
| "learning_rate": 9.922769123660564e-07, | |
| "loss": 0.1197, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.7218631874161687, | |
| "grad_norm": 0.8361820544782747, | |
| "learning_rate": 9.858460211594331e-07, | |
| "loss": 0.124, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7228386782099744, | |
| "grad_norm": 0.846259877698448, | |
| "learning_rate": 9.79430914921677e-07, | |
| "loss": 0.1207, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.72381416900378, | |
| "grad_norm": 0.8468390031221599, | |
| "learning_rate": 9.730316605299041e-07, | |
| "loss": 0.1226, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.7247896597975857, | |
| "grad_norm": 0.9014801026605848, | |
| "learning_rate": 9.666483246959785e-07, | |
| "loss": 0.119, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.7257651505913912, | |
| "grad_norm": 0.9115482145040746, | |
| "learning_rate": 9.602809739658105e-07, | |
| "loss": 0.1245, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.7267406413851969, | |
| "grad_norm": 0.8643571047269124, | |
| "learning_rate": 9.53929674718668e-07, | |
| "loss": 0.1216, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7277161321790026, | |
| "grad_norm": 0.8382233183752549, | |
| "learning_rate": 9.475944931664851e-07, | |
| "loss": 0.125, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.7286916229728082, | |
| "grad_norm": 0.8536460797398011, | |
| "learning_rate": 9.412754953531664e-07, | |
| "loss": 0.1223, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.7296671137666139, | |
| "grad_norm": 0.8756760541580888, | |
| "learning_rate": 9.349727471539052e-07, | |
| "loss": 0.1215, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.7306426045604194, | |
| "grad_norm": 0.884349162839785, | |
| "learning_rate": 9.286863142744907e-07, | |
| "loss": 0.1237, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.7316180953542251, | |
| "grad_norm": 0.8232259080657391, | |
| "learning_rate": 9.224162622506283e-07, | |
| "loss": 0.118, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7325935861480307, | |
| "grad_norm": 0.8865909364907526, | |
| "learning_rate": 9.161626564472511e-07, | |
| "loss": 0.1271, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.7335690769418364, | |
| "grad_norm": 0.8299409590937745, | |
| "learning_rate": 9.099255620578451e-07, | |
| "loss": 0.1212, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.734544567735642, | |
| "grad_norm": 0.841743660255562, | |
| "learning_rate": 9.037050441037614e-07, | |
| "loss": 0.1209, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.7355200585294476, | |
| "grad_norm": 0.9030416307912014, | |
| "learning_rate": 8.975011674335468e-07, | |
| "loss": 0.1271, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.7364955493232532, | |
| "grad_norm": 0.8090547787848019, | |
| "learning_rate": 8.9131399672226e-07, | |
| "loss": 0.1174, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7374710401170589, | |
| "grad_norm": 0.8525347836022709, | |
| "learning_rate": 8.851435964708044e-07, | |
| "loss": 0.1138, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.7384465309108645, | |
| "grad_norm": 0.8419910445268589, | |
| "learning_rate": 8.789900310052491e-07, | |
| "loss": 0.1215, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.7394220217046702, | |
| "grad_norm": 0.8490121207415494, | |
| "learning_rate": 8.728533644761647e-07, | |
| "loss": 0.1235, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.7403975124984757, | |
| "grad_norm": 0.8982804136005714, | |
| "learning_rate": 8.667336608579488e-07, | |
| "loss": 0.1226, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.7413730032922814, | |
| "grad_norm": 0.8467362174197229, | |
| "learning_rate": 8.606309839481628e-07, | |
| "loss": 0.123, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.742348494086087, | |
| "grad_norm": 0.8380377962583168, | |
| "learning_rate": 8.545453973668643e-07, | |
| "loss": 0.1182, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.7433239848798927, | |
| "grad_norm": 0.8630957150176094, | |
| "learning_rate": 8.484769645559482e-07, | |
| "loss": 0.1195, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.7442994756736984, | |
| "grad_norm": 0.8473968774046894, | |
| "learning_rate": 8.424257487784787e-07, | |
| "loss": 0.1221, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.7452749664675039, | |
| "grad_norm": 0.9113778016570447, | |
| "learning_rate": 8.363918131180371e-07, | |
| "loss": 0.1256, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.7462504572613096, | |
| "grad_norm": 0.9276069699799473, | |
| "learning_rate": 8.303752204780563e-07, | |
| "loss": 0.1248, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7472259480551152, | |
| "grad_norm": 0.8167762715238328, | |
| "learning_rate": 8.243760335811734e-07, | |
| "loss": 0.1246, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.7482014388489209, | |
| "grad_norm": 0.8357245883738734, | |
| "learning_rate": 8.183943149685678e-07, | |
| "loss": 0.1173, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.7491769296427265, | |
| "grad_norm": 0.8026503286373335, | |
| "learning_rate": 8.124301269993168e-07, | |
| "loss": 0.1151, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.7501524204365322, | |
| "grad_norm": 0.9315071378112674, | |
| "learning_rate": 8.06483531849738e-07, | |
| "loss": 0.1235, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.7511279112303377, | |
| "grad_norm": 0.9034958615476297, | |
| "learning_rate": 8.00554591512748e-07, | |
| "loss": 0.1262, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7521034020241434, | |
| "grad_norm": 0.8669626589106916, | |
| "learning_rate": 7.946433677972104e-07, | |
| "loss": 0.1243, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.753078892817949, | |
| "grad_norm": 0.8864921293794893, | |
| "learning_rate": 7.887499223272968e-07, | |
| "loss": 0.1298, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.7540543836117547, | |
| "grad_norm": 0.8392771141328131, | |
| "learning_rate": 7.828743165418393e-07, | |
| "loss": 0.111, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.7550298744055604, | |
| "grad_norm": 0.821355233876034, | |
| "learning_rate": 7.770166116936923e-07, | |
| "loss": 0.1139, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.7560053651993659, | |
| "grad_norm": 0.8688766276433155, | |
| "learning_rate": 7.711768688490962e-07, | |
| "loss": 0.1219, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.7569808559931716, | |
| "grad_norm": 0.9962941039117293, | |
| "learning_rate": 7.653551488870356e-07, | |
| "loss": 0.1283, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.7579563467869772, | |
| "grad_norm": 0.8627221635187925, | |
| "learning_rate": 7.595515124986106e-07, | |
| "loss": 0.1237, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.7589318375807829, | |
| "grad_norm": 0.8592174071340959, | |
| "learning_rate": 7.537660201863989e-07, | |
| "loss": 0.1236, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.7599073283745885, | |
| "grad_norm": 0.8914444059496847, | |
| "learning_rate": 7.479987322638274e-07, | |
| "loss": 0.126, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.7608828191683941, | |
| "grad_norm": 0.838972249342774, | |
| "learning_rate": 7.422497088545436e-07, | |
| "loss": 0.1225, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7618583099621997, | |
| "grad_norm": 0.8350154394486518, | |
| "learning_rate": 7.365190098917896e-07, | |
| "loss": 0.1172, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.7628338007560054, | |
| "grad_norm": 0.8527764567297885, | |
| "learning_rate": 7.308066951177742e-07, | |
| "loss": 0.1194, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.763809291549811, | |
| "grad_norm": 0.7880927395631528, | |
| "learning_rate": 7.251128240830543e-07, | |
| "loss": 0.1122, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.7647847823436167, | |
| "grad_norm": 0.8570625979073652, | |
| "learning_rate": 7.194374561459094e-07, | |
| "loss": 0.1275, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.7657602731374222, | |
| "grad_norm": 0.9193780613269857, | |
| "learning_rate": 7.137806504717276e-07, | |
| "loss": 0.136, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.7667357639312279, | |
| "grad_norm": 0.8788105843538093, | |
| "learning_rate": 7.081424660323846e-07, | |
| "loss": 0.1213, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.7677112547250335, | |
| "grad_norm": 0.8629850615115177, | |
| "learning_rate": 7.025229616056326e-07, | |
| "loss": 0.1223, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.7686867455188392, | |
| "grad_norm": 0.8509382827108314, | |
| "learning_rate": 6.969221957744832e-07, | |
| "loss": 0.1325, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.7696622363126449, | |
| "grad_norm": 0.8104302202106697, | |
| "learning_rate": 6.913402269266026e-07, | |
| "loss": 0.1193, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.7706377271064504, | |
| "grad_norm": 0.7876537221330931, | |
| "learning_rate": 6.857771132536958e-07, | |
| "loss": 0.1177, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.771613217900256, | |
| "grad_norm": 0.8452359406579485, | |
| "learning_rate": 6.802329127509072e-07, | |
| "loss": 0.1236, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.7725887086940617, | |
| "grad_norm": 0.9041467869529476, | |
| "learning_rate": 6.747076832162094e-07, | |
| "loss": 0.1331, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.7735641994878674, | |
| "grad_norm": 0.8750663033303995, | |
| "learning_rate": 6.692014822498066e-07, | |
| "loss": 0.1179, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.774539690281673, | |
| "grad_norm": 0.7709557191713534, | |
| "learning_rate": 6.637143672535282e-07, | |
| "loss": 0.1123, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.7755151810754786, | |
| "grad_norm": 0.8489536750773737, | |
| "learning_rate": 6.582463954302368e-07, | |
| "loss": 0.1189, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.7764906718692842, | |
| "grad_norm": 0.7986353115358458, | |
| "learning_rate": 6.527976237832256e-07, | |
| "loss": 0.1145, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.7774661626630899, | |
| "grad_norm": 0.8636955041864243, | |
| "learning_rate": 6.473681091156289e-07, | |
| "loss": 0.1235, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.7784416534568955, | |
| "grad_norm": 0.8485394380202599, | |
| "learning_rate": 6.419579080298263e-07, | |
| "loss": 0.1218, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.7794171442507012, | |
| "grad_norm": 0.8626133292687657, | |
| "learning_rate": 6.36567076926857e-07, | |
| "loss": 0.1227, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.7803926350445067, | |
| "grad_norm": 0.8270229140897313, | |
| "learning_rate": 6.311956720058263e-07, | |
| "loss": 0.1278, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7813681258383124, | |
| "grad_norm": 0.8821105926200152, | |
| "learning_rate": 6.258437492633254e-07, | |
| "loss": 0.1251, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.782343616632118, | |
| "grad_norm": 0.7683315982295926, | |
| "learning_rate": 6.205113644928429e-07, | |
| "loss": 0.1139, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.7833191074259237, | |
| "grad_norm": 0.8270618740008568, | |
| "learning_rate": 6.151985732841867e-07, | |
| "loss": 0.1117, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.7842945982197294, | |
| "grad_norm": 0.8155271658159645, | |
| "learning_rate": 6.099054310229008e-07, | |
| "loss": 0.1308, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.7852700890135349, | |
| "grad_norm": 0.7877985298756666, | |
| "learning_rate": 6.046319928896926e-07, | |
| "loss": 0.1157, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.7862455798073406, | |
| "grad_norm": 0.8527729286111828, | |
| "learning_rate": 5.993783138598532e-07, | |
| "loss": 0.1254, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.7872210706011462, | |
| "grad_norm": 0.8162522251396951, | |
| "learning_rate": 5.941444487026877e-07, | |
| "loss": 0.1193, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.7881965613949519, | |
| "grad_norm": 0.8198785573921169, | |
| "learning_rate": 5.889304519809402e-07, | |
| "loss": 0.1225, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.7891720521887575, | |
| "grad_norm": 0.8649744547672364, | |
| "learning_rate": 5.837363780502309e-07, | |
| "loss": 0.1301, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.7901475429825631, | |
| "grad_norm": 0.8253170535592403, | |
| "learning_rate": 5.785622810584834e-07, | |
| "loss": 0.1173, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7911230337763687, | |
| "grad_norm": 0.7949976111986673, | |
| "learning_rate": 5.734082149453634e-07, | |
| "loss": 0.1134, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.7920985245701744, | |
| "grad_norm": 0.8803222417671256, | |
| "learning_rate": 5.682742334417171e-07, | |
| "loss": 0.125, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.79307401536398, | |
| "grad_norm": 0.8262641815619376, | |
| "learning_rate": 5.631603900690077e-07, | |
| "loss": 0.1214, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.7940495061577857, | |
| "grad_norm": 0.849372433500513, | |
| "learning_rate": 5.580667381387623e-07, | |
| "loss": 0.1244, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.7950249969515912, | |
| "grad_norm": 0.7908963105671865, | |
| "learning_rate": 5.529933307520102e-07, | |
| "loss": 0.1087, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.7960004877453969, | |
| "grad_norm": 0.8908242498302418, | |
| "learning_rate": 5.47940220798735e-07, | |
| "loss": 0.1171, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.7969759785392025, | |
| "grad_norm": 0.8393797963498955, | |
| "learning_rate": 5.429074609573184e-07, | |
| "loss": 0.1226, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.7979514693330082, | |
| "grad_norm": 0.8535887142626548, | |
| "learning_rate": 5.378951036939966e-07, | |
| "loss": 0.1238, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.7989269601268139, | |
| "grad_norm": 0.8500240894184807, | |
| "learning_rate": 5.329032012623064e-07, | |
| "loss": 0.1244, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.7999024509206194, | |
| "grad_norm": 0.8632462798593744, | |
| "learning_rate": 5.279318057025476e-07, | |
| "loss": 0.1131, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8008779417144251, | |
| "grad_norm": 0.8502544027437935, | |
| "learning_rate": 5.229809688412337e-07, | |
| "loss": 0.125, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.8018534325082307, | |
| "grad_norm": 0.8275247834448751, | |
| "learning_rate": 5.180507422905585e-07, | |
| "loss": 0.1182, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8028289233020364, | |
| "grad_norm": 0.8976360407929419, | |
| "learning_rate": 5.131411774478503e-07, | |
| "loss": 0.1308, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.803804414095842, | |
| "grad_norm": 0.8925880179518361, | |
| "learning_rate": 5.082523254950447e-07, | |
| "loss": 0.1194, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8047799048896476, | |
| "grad_norm": 0.7964366205604348, | |
| "learning_rate": 5.033842373981429e-07, | |
| "loss": 0.1157, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8057553956834532, | |
| "grad_norm": 0.8629927042044921, | |
| "learning_rate": 4.985369639066867e-07, | |
| "loss": 0.1231, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.8067308864772589, | |
| "grad_norm": 0.8130144409152532, | |
| "learning_rate": 4.937105555532252e-07, | |
| "loss": 0.1205, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.8077063772710645, | |
| "grad_norm": 0.7968686512185147, | |
| "learning_rate": 4.889050626527905e-07, | |
| "loss": 0.1222, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.8086818680648702, | |
| "grad_norm": 0.8084128367948819, | |
| "learning_rate": 4.841205353023715e-07, | |
| "loss": 0.1176, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.8096573588586757, | |
| "grad_norm": 0.8260965745640515, | |
| "learning_rate": 4.79357023380394e-07, | |
| "loss": 0.1168, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8106328496524814, | |
| "grad_norm": 0.8136591394426701, | |
| "learning_rate": 4.746145765461965e-07, | |
| "loss": 0.1246, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.811608340446287, | |
| "grad_norm": 0.84880591266123, | |
| "learning_rate": 4.6989324423951847e-07, | |
| "loss": 0.1207, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.8125838312400927, | |
| "grad_norm": 0.8381317559160933, | |
| "learning_rate": 4.651930756799794e-07, | |
| "loss": 0.1214, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 0.8225053300538132, | |
| "learning_rate": 4.6051411986656775e-07, | |
| "loss": 0.1222, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.8145348128277039, | |
| "grad_norm": 0.7758201411310363, | |
| "learning_rate": 4.5585642557713083e-07, | |
| "loss": 0.1108, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8155103036215096, | |
| "grad_norm": 0.7841422014242995, | |
| "learning_rate": 4.512200413678672e-07, | |
| "loss": 0.1149, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.8164857944153152, | |
| "grad_norm": 0.8433260125398965, | |
| "learning_rate": 4.466050155728166e-07, | |
| "loss": 0.1246, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.8174612852091209, | |
| "grad_norm": 0.8004993062409861, | |
| "learning_rate": 4.420113963033607e-07, | |
| "loss": 0.1139, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.8184367760029265, | |
| "grad_norm": 0.8544261665582314, | |
| "learning_rate": 4.374392314477174e-07, | |
| "loss": 0.1442, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.8194122667967321, | |
| "grad_norm": 0.8269193033610706, | |
| "learning_rate": 4.3288856867044564e-07, | |
| "loss": 0.117, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8203877575905377, | |
| "grad_norm": 0.8056654304417562, | |
| "learning_rate": 4.2835945541194414e-07, | |
| "loss": 0.1106, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.8213632483843434, | |
| "grad_norm": 0.8225949604333195, | |
| "learning_rate": 4.238519388879614e-07, | |
| "loss": 0.118, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.822338739178149, | |
| "grad_norm": 0.8057418784933338, | |
| "learning_rate": 4.1936606608909887e-07, | |
| "loss": 0.1155, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.8233142299719547, | |
| "grad_norm": 0.8424817751997794, | |
| "learning_rate": 4.149018837803262e-07, | |
| "loss": 0.124, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.8242897207657602, | |
| "grad_norm": 0.7785385373841229, | |
| "learning_rate": 4.1045943850048843e-07, | |
| "loss": 0.1229, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8252652115595659, | |
| "grad_norm": 0.8139230651458013, | |
| "learning_rate": 4.0603877656182373e-07, | |
| "loss": 0.1201, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.8262407023533715, | |
| "grad_norm": 0.8136291299903247, | |
| "learning_rate": 4.0163994404948144e-07, | |
| "loss": 0.1147, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.8272161931471772, | |
| "grad_norm": 0.7604644453749246, | |
| "learning_rate": 3.9726298682103923e-07, | |
| "loss": 0.1136, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.8281916839409829, | |
| "grad_norm": 0.8003636956695743, | |
| "learning_rate": 3.929079505060271e-07, | |
| "loss": 0.1104, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.8291671747347884, | |
| "grad_norm": 0.8051074779072083, | |
| "learning_rate": 3.8857488050544903e-07, | |
| "loss": 0.1196, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8301426655285941, | |
| "grad_norm": 0.8417299086430947, | |
| "learning_rate": 3.842638219913142e-07, | |
| "loss": 0.1187, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.8311181563223997, | |
| "grad_norm": 0.8473564595322601, | |
| "learning_rate": 3.799748199061612e-07, | |
| "loss": 0.1238, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.8320936471162054, | |
| "grad_norm": 0.8530209378089045, | |
| "learning_rate": 3.7570791896259147e-07, | |
| "loss": 0.1195, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.833069137910011, | |
| "grad_norm": 0.8683334198893611, | |
| "learning_rate": 3.7146316364280426e-07, | |
| "loss": 0.1202, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.8340446287038166, | |
| "grad_norm": 0.8185200067165666, | |
| "learning_rate": 3.6724059819813223e-07, | |
| "loss": 0.1244, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.8350201194976222, | |
| "grad_norm": 0.8871001344843794, | |
| "learning_rate": 3.630402666485783e-07, | |
| "loss": 0.1257, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.8359956102914279, | |
| "grad_norm": 0.8335712456953084, | |
| "learning_rate": 3.5886221278236045e-07, | |
| "loss": 0.117, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.8369711010852335, | |
| "grad_norm": 0.8724817716079213, | |
| "learning_rate": 3.547064801554509e-07, | |
| "loss": 0.1223, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.8379465918790392, | |
| "grad_norm": 0.8327646410251778, | |
| "learning_rate": 3.5057311209112625e-07, | |
| "loss": 0.1273, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.8389220826728447, | |
| "grad_norm": 0.8553723242208955, | |
| "learning_rate": 3.4646215167951166e-07, | |
| "loss": 0.1237, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8398975734666504, | |
| "grad_norm": 0.8667258133361339, | |
| "learning_rate": 3.4237364177713615e-07, | |
| "loss": 0.1194, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.840873064260456, | |
| "grad_norm": 0.775526945884146, | |
| "learning_rate": 3.383076250064815e-07, | |
| "loss": 0.111, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.8418485550542617, | |
| "grad_norm": 0.8350772764316703, | |
| "learning_rate": 3.342641437555411e-07, | |
| "loss": 0.1227, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.8428240458480674, | |
| "grad_norm": 0.8737189518152014, | |
| "learning_rate": 3.3024324017737555e-07, | |
| "loss": 0.1219, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.8437995366418729, | |
| "grad_norm": 0.8525338624818053, | |
| "learning_rate": 3.262449561896766e-07, | |
| "loss": 0.1208, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8447750274356786, | |
| "grad_norm": 0.7808415133904315, | |
| "learning_rate": 3.2226933347432516e-07, | |
| "loss": 0.1115, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.8457505182294842, | |
| "grad_norm": 0.8212690915534371, | |
| "learning_rate": 3.183164134769631e-07, | |
| "loss": 0.1192, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.8467260090232899, | |
| "grad_norm": 0.8432223045748855, | |
| "learning_rate": 3.143862374065548e-07, | |
| "loss": 0.1271, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.8477014998170955, | |
| "grad_norm": 0.809073364351975, | |
| "learning_rate": 3.104788462349612e-07, | |
| "loss": 0.1164, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.8486769906109011, | |
| "grad_norm": 0.8122295362266347, | |
| "learning_rate": 3.065942806965139e-07, | |
| "loss": 0.1166, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8496524814047067, | |
| "grad_norm": 0.8628135926038828, | |
| "learning_rate": 3.0273258128758585e-07, | |
| "loss": 0.1231, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.8506279721985124, | |
| "grad_norm": 0.9038737225256949, | |
| "learning_rate": 2.98893788266173e-07, | |
| "loss": 0.1269, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.851603462992318, | |
| "grad_norm": 0.8415109669107367, | |
| "learning_rate": 2.9507794165147487e-07, | |
| "loss": 0.1185, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.8525789537861237, | |
| "grad_norm": 0.8260696094654174, | |
| "learning_rate": 2.9128508122347324e-07, | |
| "loss": 0.1209, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.8535544445799292, | |
| "grad_norm": 0.7950123942247705, | |
| "learning_rate": 2.875152465225234e-07, | |
| "loss": 0.1107, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.8545299353737349, | |
| "grad_norm": 0.8036333920809307, | |
| "learning_rate": 2.837684768489354e-07, | |
| "loss": 0.1159, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.8555054261675406, | |
| "grad_norm": 0.8543568873961171, | |
| "learning_rate": 2.800448112625709e-07, | |
| "loss": 0.1278, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.8564809169613462, | |
| "grad_norm": 0.8705052724557507, | |
| "learning_rate": 2.7634428858242995e-07, | |
| "loss": 0.1244, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.8574564077551519, | |
| "grad_norm": 0.8648954628473264, | |
| "learning_rate": 2.7266694738625143e-07, | |
| "loss": 0.1134, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.8584318985489574, | |
| "grad_norm": 0.8569598558609475, | |
| "learning_rate": 2.690128260101069e-07, | |
| "loss": 0.1216, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8594073893427631, | |
| "grad_norm": 0.8615706510023747, | |
| "learning_rate": 2.6538196254800393e-07, | |
| "loss": 0.1282, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.8603828801365687, | |
| "grad_norm": 0.8654838887587664, | |
| "learning_rate": 2.617743948514867e-07, | |
| "loss": 0.1279, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.8613583709303744, | |
| "grad_norm": 0.8150117399404916, | |
| "learning_rate": 2.5819016052924224e-07, | |
| "loss": 0.1186, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.86233386172418, | |
| "grad_norm": 0.8042159227280485, | |
| "learning_rate": 2.5462929694670986e-07, | |
| "loss": 0.1162, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.8098200477108216, | |
| "learning_rate": 2.5109184122568797e-07, | |
| "loss": 0.1079, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.8642848433117912, | |
| "grad_norm": 0.8037754437911059, | |
| "learning_rate": 2.4757783024395244e-07, | |
| "loss": 0.1168, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.8652603341055969, | |
| "grad_norm": 0.8174214190622696, | |
| "learning_rate": 2.44087300634866e-07, | |
| "loss": 0.1143, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.8662358248994025, | |
| "grad_norm": 0.9250267238278224, | |
| "learning_rate": 2.4062028878700074e-07, | |
| "loss": 0.1242, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.8672113156932082, | |
| "grad_norm": 0.843085805747338, | |
| "learning_rate": 2.3717683084375832e-07, | |
| "loss": 0.1179, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.8681868064870137, | |
| "grad_norm": 0.8349662619877875, | |
| "learning_rate": 2.3375696270299093e-07, | |
| "loss": 0.1193, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8691622972808194, | |
| "grad_norm": 0.7931357279276846, | |
| "learning_rate": 2.3036072001662829e-07, | |
| "loss": 0.1112, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.870137788074625, | |
| "grad_norm": 0.8058217864023394, | |
| "learning_rate": 2.2698813819030802e-07, | |
| "loss": 0.1128, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.8711132788684307, | |
| "grad_norm": 0.8105881503397407, | |
| "learning_rate": 2.2363925238300167e-07, | |
| "loss": 0.1186, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.8720887696622364, | |
| "grad_norm": 0.793047905194702, | |
| "learning_rate": 2.2031409750665422e-07, | |
| "loss": 0.1136, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.8730642604560419, | |
| "grad_norm": 0.887128059993288, | |
| "learning_rate": 2.170127082258147e-07, | |
| "loss": 0.1179, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.8740397512498476, | |
| "grad_norm": 0.7729380382439517, | |
| "learning_rate": 2.1373511895727866e-07, | |
| "loss": 0.1093, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.8750152420436532, | |
| "grad_norm": 0.8334653630393726, | |
| "learning_rate": 2.1048136386972645e-07, | |
| "loss": 0.1201, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.8759907328374589, | |
| "grad_norm": 0.8109427974591241, | |
| "learning_rate": 2.0725147688337054e-07, | |
| "loss": 0.1209, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.8769662236312645, | |
| "grad_norm": 0.8075287607010287, | |
| "learning_rate": 2.040454916695972e-07, | |
| "loss": 0.1104, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.8779417144250701, | |
| "grad_norm": 0.7907021456077222, | |
| "learning_rate": 2.0086344165062054e-07, | |
| "loss": 0.1186, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8789172052188757, | |
| "grad_norm": 0.8321531164794602, | |
| "learning_rate": 1.9770535999912967e-07, | |
| "loss": 0.1242, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.8798926960126814, | |
| "grad_norm": 0.836504402336581, | |
| "learning_rate": 1.945712796379462e-07, | |
| "loss": 0.1195, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.880868186806487, | |
| "grad_norm": 0.8414921499203574, | |
| "learning_rate": 1.914612332396787e-07, | |
| "loss": 0.1191, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.8818436776002927, | |
| "grad_norm": 0.8268050582099995, | |
| "learning_rate": 1.883752532263844e-07, | |
| "loss": 0.1212, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.8828191683940982, | |
| "grad_norm": 0.7804225339880899, | |
| "learning_rate": 1.8531337176922792e-07, | |
| "loss": 0.109, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.8837946591879039, | |
| "grad_norm": 0.8089563734380512, | |
| "learning_rate": 1.8227562078814903e-07, | |
| "loss": 0.1185, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.8847701499817096, | |
| "grad_norm": 0.8524343318848043, | |
| "learning_rate": 1.7926203195152898e-07, | |
| "loss": 0.1236, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.8857456407755152, | |
| "grad_norm": 0.8146203036325108, | |
| "learning_rate": 1.7627263667585882e-07, | |
| "loss": 0.1118, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.8867211315693209, | |
| "grad_norm": 0.8438170384778132, | |
| "learning_rate": 1.7330746612541387e-07, | |
| "loss": 0.1153, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.8876966223631264, | |
| "grad_norm": 0.816511650231362, | |
| "learning_rate": 1.7036655121192875e-07, | |
| "loss": 0.1262, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8886721131569321, | |
| "grad_norm": 0.8384874737504223, | |
| "learning_rate": 1.6744992259427272e-07, | |
| "loss": 0.1241, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.8896476039507377, | |
| "grad_norm": 0.8111140618017998, | |
| "learning_rate": 1.6455761067813352e-07, | |
| "loss": 0.1137, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.8906230947445434, | |
| "grad_norm": 0.8137411483354814, | |
| "learning_rate": 1.6168964561569716e-07, | |
| "loss": 0.1177, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.891598585538349, | |
| "grad_norm": 0.7826271884075486, | |
| "learning_rate": 1.5884605730533686e-07, | |
| "loss": 0.1173, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.8925740763321546, | |
| "grad_norm": 0.8815949613672592, | |
| "learning_rate": 1.5602687539129745e-07, | |
| "loss": 0.1224, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.8935495671259602, | |
| "grad_norm": 0.8365843017681764, | |
| "learning_rate": 1.5323212926339037e-07, | |
| "loss": 0.119, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.8945250579197659, | |
| "grad_norm": 0.7838035275941735, | |
| "learning_rate": 1.504618480566844e-07, | |
| "loss": 0.1137, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.8955005487135715, | |
| "grad_norm": 0.8571652908859096, | |
| "learning_rate": 1.4771606065120293e-07, | |
| "loss": 0.1176, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.8964760395073772, | |
| "grad_norm": 0.8042632494131905, | |
| "learning_rate": 1.4499479567162328e-07, | |
| "loss": 0.1193, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.8974515303011827, | |
| "grad_norm": 0.8620451657826478, | |
| "learning_rate": 1.4229808148697732e-07, | |
| "loss": 0.1245, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8984270210949884, | |
| "grad_norm": 0.8376113680024069, | |
| "learning_rate": 1.396259462103572e-07, | |
| "loss": 0.1101, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.899402511888794, | |
| "grad_norm": 0.8311869479243955, | |
| "learning_rate": 1.3697841769861996e-07, | |
| "loss": 0.1198, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.9003780026825997, | |
| "grad_norm": 0.869133372824549, | |
| "learning_rate": 1.3435552355210018e-07, | |
| "loss": 0.1172, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.9013534934764054, | |
| "grad_norm": 0.8549023606786766, | |
| "learning_rate": 1.3175729111431946e-07, | |
| "loss": 0.1248, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.9023289842702109, | |
| "grad_norm": 0.8266104807259295, | |
| "learning_rate": 1.2918374747170225e-07, | |
| "loss": 0.1157, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9033044750640166, | |
| "grad_norm": 0.8725974407693242, | |
| "learning_rate": 1.266349194532951e-07, | |
| "loss": 0.1212, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.9042799658578222, | |
| "grad_norm": 0.8869251827136297, | |
| "learning_rate": 1.2411083363048386e-07, | |
| "loss": 0.1168, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.9052554566516279, | |
| "grad_norm": 0.838978233063295, | |
| "learning_rate": 1.2161151631671974e-07, | |
| "loss": 0.1193, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.9062309474454335, | |
| "grad_norm": 0.8158933020010286, | |
| "learning_rate": 1.1913699356724317e-07, | |
| "loss": 0.1139, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.9072064382392391, | |
| "grad_norm": 0.8302262653149984, | |
| "learning_rate": 1.166872911788125e-07, | |
| "loss": 0.1193, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9081819290330447, | |
| "grad_norm": 0.8281059144615461, | |
| "learning_rate": 1.1426243468943582e-07, | |
| "loss": 0.1218, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.9091574198268504, | |
| "grad_norm": 0.8150387396832428, | |
| "learning_rate": 1.1186244937810315e-07, | |
| "loss": 0.1133, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.910132910620656, | |
| "grad_norm": 0.8174814241523217, | |
| "learning_rate": 1.0948736026452495e-07, | |
| "loss": 0.1111, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.9111084014144617, | |
| "grad_norm": 0.8072685450600614, | |
| "learning_rate": 1.071371921088693e-07, | |
| "loss": 0.1219, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.9120838922082672, | |
| "grad_norm": 0.8462154845847925, | |
| "learning_rate": 1.0481196941150574e-07, | |
| "loss": 0.1131, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.9130593830020729, | |
| "grad_norm": 0.8194674338836323, | |
| "learning_rate": 1.0251171641274788e-07, | |
| "loss": 0.1208, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.9140348737958786, | |
| "grad_norm": 0.8051460380945417, | |
| "learning_rate": 1.0023645709260233e-07, | |
| "loss": 0.111, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.9150103645896842, | |
| "grad_norm": 0.7953737847156309, | |
| "learning_rate": 9.798621517051716e-08, | |
| "loss": 0.1134, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.9159858553834899, | |
| "grad_norm": 0.8137082279917823, | |
| "learning_rate": 9.576101410513655e-08, | |
| "loss": 0.1184, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.9169613461772954, | |
| "grad_norm": 0.81376889042028, | |
| "learning_rate": 9.356087709405465e-08, | |
| "loss": 0.1141, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9179368369711011, | |
| "grad_norm": 0.8493873934604731, | |
| "learning_rate": 9.138582707357429e-08, | |
| "loss": 0.1178, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.9189123277649067, | |
| "grad_norm": 0.8480580946913866, | |
| "learning_rate": 8.923588671846784e-08, | |
| "loss": 0.119, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.9198878185587124, | |
| "grad_norm": 0.8259767599447636, | |
| "learning_rate": 8.711107844174089e-08, | |
| "loss": 0.1224, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.920863309352518, | |
| "grad_norm": 0.8414744578683574, | |
| "learning_rate": 8.501142439439808e-08, | |
| "loss": 0.1184, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.9218388001463236, | |
| "grad_norm": 0.7500528197265454, | |
| "learning_rate": 8.29369464652141e-08, | |
| "loss": 0.0985, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.9228142909401292, | |
| "grad_norm": 0.8122867841326354, | |
| "learning_rate": 8.088766628050193e-08, | |
| "loss": 0.1102, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.9237897817339349, | |
| "grad_norm": 0.8378630528612887, | |
| "learning_rate": 7.886360520389158e-08, | |
| "loss": 0.1187, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.9247652725277405, | |
| "grad_norm": 0.7993318052605702, | |
| "learning_rate": 7.686478433610339e-08, | |
| "loss": 0.113, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.9257407633215462, | |
| "grad_norm": 0.8282335985098225, | |
| "learning_rate": 7.48912245147318e-08, | |
| "loss": 0.1183, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.9267162541153517, | |
| "grad_norm": 0.8238348823155386, | |
| "learning_rate": 7.294294631402493e-08, | |
| "loss": 0.1202, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9276917449091574, | |
| "grad_norm": 0.8024852399758585, | |
| "learning_rate": 7.101997004467203e-08, | |
| "loss": 0.122, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.9286672357029631, | |
| "grad_norm": 0.7847553156886314, | |
| "learning_rate": 6.912231575359057e-08, | |
| "loss": 0.1104, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.9296427264967687, | |
| "grad_norm": 0.8975888719583766, | |
| "learning_rate": 6.725000322371916e-08, | |
| "loss": 0.1186, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.9306182172905744, | |
| "grad_norm": 0.8212833223497472, | |
| "learning_rate": 6.540305197380859e-08, | |
| "loss": 0.122, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.9315937080843799, | |
| "grad_norm": 0.7982479941876005, | |
| "learning_rate": 6.358148125822e-08, | |
| "loss": 0.1136, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.9325691988781856, | |
| "grad_norm": 0.8592832767848003, | |
| "learning_rate": 6.178531006672484e-08, | |
| "loss": 0.1276, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.9335446896719912, | |
| "grad_norm": 0.8873811445927061, | |
| "learning_rate": 6.001455712430492e-08, | |
| "loss": 0.126, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.9345201804657969, | |
| "grad_norm": 0.8556443661624066, | |
| "learning_rate": 5.826924089095881e-08, | |
| "loss": 0.1176, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.9354956712596025, | |
| "grad_norm": 0.7924528652097824, | |
| "learning_rate": 5.654937956150852e-08, | |
| "loss": 0.1208, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.9364711620534081, | |
| "grad_norm": 0.8207355407913433, | |
| "learning_rate": 5.4854991065410866e-08, | |
| "loss": 0.1236, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9374466528472137, | |
| "grad_norm": 0.823166152063427, | |
| "learning_rate": 5.3186093066568965e-08, | |
| "loss": 0.1195, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.9384221436410194, | |
| "grad_norm": 0.9014815011677116, | |
| "learning_rate": 5.154270296314878e-08, | |
| "loss": 0.1317, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.939397634434825, | |
| "grad_norm": 0.8362981567362682, | |
| "learning_rate": 4.992483788739927e-08, | |
| "loss": 0.1208, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.9403731252286307, | |
| "grad_norm": 0.840511469516904, | |
| "learning_rate": 4.833251470547084e-08, | |
| "loss": 0.1293, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.9413486160224362, | |
| "grad_norm": 0.831812913196967, | |
| "learning_rate": 4.6765750017242206e-08, | |
| "loss": 0.1217, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.9423241068162419, | |
| "grad_norm": 0.8367591325240412, | |
| "learning_rate": 4.5224560156145734e-08, | |
| "loss": 0.118, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.9432995976100476, | |
| "grad_norm": 0.879426458908269, | |
| "learning_rate": 4.370896118899792e-08, | |
| "loss": 0.1145, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.9442750884038532, | |
| "grad_norm": 0.8098437070862734, | |
| "learning_rate": 4.221896891583144e-08, | |
| "loss": 0.1144, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.9452505791976589, | |
| "grad_norm": 0.7667200836569177, | |
| "learning_rate": 4.0754598869730824e-08, | |
| "loss": 0.11, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.9462260699914644, | |
| "grad_norm": 0.8556949255820121, | |
| "learning_rate": 3.931586631667039e-08, | |
| "loss": 0.1246, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9472015607852701, | |
| "grad_norm": 0.8278433518221362, | |
| "learning_rate": 3.7902786255354927e-08, | |
| "loss": 0.1244, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.9481770515790757, | |
| "grad_norm": 0.8279334840526833, | |
| "learning_rate": 3.65153734170634e-08, | |
| "loss": 0.1167, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 0.8237905685355518, | |
| "learning_rate": 3.515364226549523e-08, | |
| "loss": 0.1205, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.950128033166687, | |
| "grad_norm": 0.8493136167563948, | |
| "learning_rate": 3.381760699662062e-08, | |
| "loss": 0.1223, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.9511035239604926, | |
| "grad_norm": 0.8361911343136549, | |
| "learning_rate": 3.25072815385305e-08, | |
| "loss": 0.1161, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9520790147542982, | |
| "grad_norm": 0.80954482668594, | |
| "learning_rate": 3.1222679551293486e-08, | |
| "loss": 0.1129, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.9530545055481039, | |
| "grad_norm": 0.82063025704283, | |
| "learning_rate": 2.996381442681162e-08, | |
| "loss": 0.1191, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.9540299963419095, | |
| "grad_norm": 0.828098268541136, | |
| "learning_rate": 2.8730699288682107e-08, | |
| "loss": 0.1262, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.9550054871357152, | |
| "grad_norm": 0.8093239279254736, | |
| "learning_rate": 2.7523346992060217e-08, | |
| "loss": 0.1159, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.9559809779295207, | |
| "grad_norm": 0.8448131471914473, | |
| "learning_rate": 2.634177012352468e-08, | |
| "loss": 0.1181, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9569564687233264, | |
| "grad_norm": 0.850456037528909, | |
| "learning_rate": 2.5185981000946657e-08, | |
| "loss": 0.1232, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.9579319595171321, | |
| "grad_norm": 0.7998292649044384, | |
| "learning_rate": 2.405599167336209e-08, | |
| "loss": 0.1152, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.9589074503109377, | |
| "grad_norm": 0.8711285830989083, | |
| "learning_rate": 2.295181392084511e-08, | |
| "loss": 0.1192, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.9598829411047434, | |
| "grad_norm": 0.831799017458, | |
| "learning_rate": 2.187345925438594e-08, | |
| "loss": 0.1143, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.9608584318985489, | |
| "grad_norm": 0.8032074365365065, | |
| "learning_rate": 2.0820938915770417e-08, | |
| "loss": 0.1105, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.9618339226923546, | |
| "grad_norm": 0.7978427999769494, | |
| "learning_rate": 1.9794263877463138e-08, | |
| "loss": 0.1061, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.9628094134861602, | |
| "grad_norm": 0.8519627713839724, | |
| "learning_rate": 1.879344484249257e-08, | |
| "loss": 0.1229, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.9637849042799659, | |
| "grad_norm": 0.8131083664763187, | |
| "learning_rate": 1.781849224434029e-08, | |
| "loss": 0.1178, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.9647603950737715, | |
| "grad_norm": 0.8711093922420086, | |
| "learning_rate": 1.686941624683164e-08, | |
| "loss": 0.1268, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.9657358858675771, | |
| "grad_norm": 0.8153485749863514, | |
| "learning_rate": 1.5946226744029402e-08, | |
| "loss": 0.1139, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9667113766613827, | |
| "grad_norm": 0.8470338609149345, | |
| "learning_rate": 1.504893336013169e-08, | |
| "loss": 0.117, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.9676868674551884, | |
| "grad_norm": 0.8952280575519709, | |
| "learning_rate": 1.4177545449370889e-08, | |
| "loss": 0.1193, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.968662358248994, | |
| "grad_norm": 0.8393629890478395, | |
| "learning_rate": 1.3332072095916527e-08, | |
| "loss": 0.1148, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.9696378490427997, | |
| "grad_norm": 0.8484999365301013, | |
| "learning_rate": 1.2512522113779235e-08, | |
| "loss": 0.1212, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.9706133398366052, | |
| "grad_norm": 0.8047573717380992, | |
| "learning_rate": 1.1718904046721657e-08, | |
| "loss": 0.1169, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.9715888306304109, | |
| "grad_norm": 0.7801955720110736, | |
| "learning_rate": 1.0951226168166574e-08, | |
| "loss": 0.1158, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.9725643214242166, | |
| "grad_norm": 0.8469472102608405, | |
| "learning_rate": 1.0209496481112247e-08, | |
| "loss": 0.119, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.9735398122180222, | |
| "grad_norm": 0.7899772186862825, | |
| "learning_rate": 9.493722718048326e-09, | |
| "loss": 0.108, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.9745153030118279, | |
| "grad_norm": 0.8122633623112819, | |
| "learning_rate": 8.803912340875076e-09, | |
| "loss": 0.1149, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.9754907938056334, | |
| "grad_norm": 0.861388140402043, | |
| "learning_rate": 8.140072540826772e-09, | |
| "loss": 0.1229, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9754907938056334, | |
| "eval_loss": 0.11711173504590988, | |
| "eval_runtime": 127.3978, | |
| "eval_samples_per_second": 5.204, | |
| "eval_steps_per_second": 0.652, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9764662845994391, | |
| "grad_norm": 0.795759380098638, | |
| "learning_rate": 7.502210238395091e-09, | |
| "loss": 0.1202, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.9774417753932447, | |
| "grad_norm": 0.7911131054538534, | |
| "learning_rate": 6.890332083258622e-09, | |
| "loss": 0.1206, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "grad_norm": 0.8225435058710419, | |
| "learning_rate": 6.304444454212077e-09, | |
| "loss": 0.1253, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.979392756980856, | |
| "grad_norm": 0.7897526541638892, | |
| "learning_rate": 5.7445534591002435e-09, | |
| "loss": 0.115, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.9803682477746616, | |
| "grad_norm": 0.8241723887349869, | |
| "learning_rate": 5.210664934754972e-09, | |
| "loss": 0.1248, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.9813437385684672, | |
| "grad_norm": 0.8404643550865318, | |
| "learning_rate": 4.702784446934116e-09, | |
| "loss": 0.1185, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.9823192293622729, | |
| "grad_norm": 0.8329025507816016, | |
| "learning_rate": 4.220917290262139e-09, | |
| "loss": 0.1221, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.9832947201560785, | |
| "grad_norm": 0.8272251709257826, | |
| "learning_rate": 3.765068488177093e-09, | |
| "loss": 0.1243, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.9842702109498842, | |
| "grad_norm": 0.8289663001334425, | |
| "learning_rate": 3.335242792876503e-09, | |
| "loss": 0.1121, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.9852457017436898, | |
| "grad_norm": 0.8935773554975892, | |
| "learning_rate": 2.931444685269069e-09, | |
| "loss": 0.1296, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9862211925374954, | |
| "grad_norm": 0.8147353591204097, | |
| "learning_rate": 2.553678374926649e-09, | |
| "loss": 0.1197, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.9871966833313011, | |
| "grad_norm": 0.8356734189663587, | |
| "learning_rate": 2.201947800041515e-09, | |
| "loss": 0.1233, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.9881721741251067, | |
| "grad_norm": 0.8277826045315564, | |
| "learning_rate": 1.8762566273852754e-09, | |
| "loss": 0.1102, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.9891476649189124, | |
| "grad_norm": 0.837735922536016, | |
| "learning_rate": 1.576608252269185e-09, | |
| "loss": 0.1282, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.9901231557127179, | |
| "grad_norm": 0.8185965403465947, | |
| "learning_rate": 1.3030057985108368e-09, | |
| "loss": 0.1184, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.9910986465065236, | |
| "grad_norm": 0.7801036446264883, | |
| "learning_rate": 1.055452118400302e-09, | |
| "loss": 0.111, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.9920741373003292, | |
| "grad_norm": 0.7975320236568699, | |
| "learning_rate": 8.339497926704299e-10, | |
| "loss": 0.114, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.9930496280941349, | |
| "grad_norm": 0.8525955644349905, | |
| "learning_rate": 6.385011304704814e-10, | |
| "loss": 0.1257, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.9940251188879405, | |
| "grad_norm": 0.8091050888964465, | |
| "learning_rate": 4.691081693411481e-10, | |
| "loss": 0.1147, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.9950006096817461, | |
| "grad_norm": 0.9205192964480091, | |
| "learning_rate": 3.2577267519484645e-10, | |
| "loss": 0.1257, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9959761004755517, | |
| "grad_norm": 0.816877602174351, | |
| "learning_rate": 2.0849614229601078e-10, | |
| "loss": 0.1145, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.9969515912693574, | |
| "grad_norm": 0.8091963243679661, | |
| "learning_rate": 1.1727979324527294e-10, | |
| "loss": 0.1185, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.997927082063163, | |
| "grad_norm": 0.8922096606338227, | |
| "learning_rate": 5.212457896835998e-11, | |
| "loss": 0.118, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.9989025728569687, | |
| "grad_norm": 0.7963658344295353, | |
| "learning_rate": 1.303117870443682e-11, | |
| "loss": 0.118, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.9998780636507743, | |
| "grad_norm": 0.832964772750828, | |
| "learning_rate": 0.0, | |
| "loss": 0.1208, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.9998780636507743, | |
| "step": 1025, | |
| "total_flos": 241738000957440.0, | |
| "train_loss": 0.1467944400630346, | |
| "train_runtime": 47673.2991, | |
| "train_samples_per_second": 1.376, | |
| "train_steps_per_second": 0.022 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1025, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 241738000957440.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |