| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.017775776357032393, |
| "eval_steps": 500, |
| "global_step": 12000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.9626293928387323e-05, |
| "grad_norm": 664.0, |
| "learning_rate": 1.9e-05, |
| "loss": 203.5701, |
| "step": 20 |
| }, |
| { |
| "epoch": 5.9252587856774646e-05, |
| "grad_norm": 70.5, |
| "learning_rate": 3.9e-05, |
| "loss": 152.8923, |
| "step": 40 |
| }, |
| { |
| "epoch": 8.887888178516196e-05, |
| "grad_norm": 103.5, |
| "learning_rate": 5.9e-05, |
| "loss": 143.9512, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.00011850517571354929, |
| "grad_norm": 74.5, |
| "learning_rate": 7.9e-05, |
| "loss": 141.3113, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0001481314696419366, |
| "grad_norm": 160.0, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 138.4991, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00017775776357032393, |
| "grad_norm": 227.0, |
| "learning_rate": 0.00011899999999999999, |
| "loss": 136.2222, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.00020738405749871127, |
| "grad_norm": 186.0, |
| "learning_rate": 0.00013900000000000002, |
| "loss": 133.6774, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00023701035142709859, |
| "grad_norm": 65.5, |
| "learning_rate": 0.00015900000000000002, |
| "loss": 130.8263, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0002666366453554859, |
| "grad_norm": 170.0, |
| "learning_rate": 0.000179, |
| "loss": 127.1942, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0002962629392838732, |
| "grad_norm": 143.0, |
| "learning_rate": 0.000199, |
| "loss": 122.895, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.00032588923321226053, |
| "grad_norm": 109.0, |
| "learning_rate": 0.000219, |
| "loss": 119.3099, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.00035551552714064785, |
| "grad_norm": 207.0, |
| "learning_rate": 0.00023899999999999998, |
| "loss": 115.1136, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.00038514182106903517, |
| "grad_norm": 192.0, |
| "learning_rate": 0.000259, |
| "loss": 110.9598, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.00041476811499742254, |
| "grad_norm": 81.0, |
| "learning_rate": 0.000279, |
| "loss": 107.5745, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.00044439440892580985, |
| "grad_norm": 66.0, |
| "learning_rate": 0.000299, |
| "loss": 103.9611, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.00047402070285419717, |
| "grad_norm": 77.0, |
| "learning_rate": 0.000319, |
| "loss": 100.1696, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.0005036469967825845, |
| "grad_norm": 76.5, |
| "learning_rate": 0.00033900000000000005, |
| "loss": 96.3467, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0005332732907109718, |
| "grad_norm": 70.0, |
| "learning_rate": 0.000359, |
| "loss": 93.3187, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.0005628995846393591, |
| "grad_norm": 81.0, |
| "learning_rate": 0.000379, |
| "loss": 90.8416, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0005925258785677464, |
| "grad_norm": 146.0, |
| "learning_rate": 0.00039900000000000005, |
| "loss": 88.6177, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0006221521724961338, |
| "grad_norm": 83.5, |
| "learning_rate": 0.000419, |
| "loss": 86.5326, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0006517784664245211, |
| "grad_norm": 99.5, |
| "learning_rate": 0.000439, |
| "loss": 84.3083, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.0006814047603529084, |
| "grad_norm": 109.5, |
| "learning_rate": 0.00045900000000000004, |
| "loss": 81.6188, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0007110310542812957, |
| "grad_norm": 82.5, |
| "learning_rate": 0.000479, |
| "loss": 79.7902, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.000740657348209683, |
| "grad_norm": 67.0, |
| "learning_rate": 0.000499, |
| "loss": 77.6862, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0007702836421380703, |
| "grad_norm": 95.0, |
| "learning_rate": 0.0004999953080117428, |
| "loss": 75.2466, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.0007999099360664577, |
| "grad_norm": 96.0, |
| "learning_rate": 0.0004999903690767353, |
| "loss": 73.0597, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0008295362299948451, |
| "grad_norm": 96.5, |
| "learning_rate": 0.0004999854301417277, |
| "loss": 71.2278, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.0008591625239232324, |
| "grad_norm": 88.0, |
| "learning_rate": 0.0004999804912067202, |
| "loss": 69.239, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0008887888178516197, |
| "grad_norm": 82.0, |
| "learning_rate": 0.0004999755522717126, |
| "loss": 67.4827, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.000918415111780007, |
| "grad_norm": 55.75, |
| "learning_rate": 0.000499970613336705, |
| "loss": 65.9883, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0009480414057083943, |
| "grad_norm": 64.5, |
| "learning_rate": 0.0004999656744016975, |
| "loss": 64.7352, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0009776676996367817, |
| "grad_norm": 66.5, |
| "learning_rate": 0.0004999607354666899, |
| "loss": 63.1578, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.001007293993565169, |
| "grad_norm": 80.0, |
| "learning_rate": 0.0004999557965316823, |
| "loss": 62.002, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.0010369202874935563, |
| "grad_norm": 79.0, |
| "learning_rate": 0.0004999508575966748, |
| "loss": 61.0053, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0010665465814219436, |
| "grad_norm": 63.75, |
| "learning_rate": 0.0004999459186616671, |
| "loss": 59.9261, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.001096172875350331, |
| "grad_norm": 58.5, |
| "learning_rate": 0.0004999409797266595, |
| "loss": 59.0368, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0011257991692787182, |
| "grad_norm": 71.5, |
| "learning_rate": 0.000499936040791652, |
| "loss": 58.2665, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.0011554254632071056, |
| "grad_norm": 43.75, |
| "learning_rate": 0.0004999311018566444, |
| "loss": 57.2501, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0011850517571354929, |
| "grad_norm": 68.0, |
| "learning_rate": 0.0004999261629216369, |
| "loss": 56.5027, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0012146780510638802, |
| "grad_norm": 54.0, |
| "learning_rate": 0.0004999212239866293, |
| "loss": 55.9271, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0012443043449922675, |
| "grad_norm": 60.0, |
| "learning_rate": 0.0004999162850516217, |
| "loss": 55.3761, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.0012739306389206548, |
| "grad_norm": 55.5, |
| "learning_rate": 0.0004999113461166142, |
| "loss": 54.6099, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0013035569328490421, |
| "grad_norm": 89.0, |
| "learning_rate": 0.0004999064071816066, |
| "loss": 53.9408, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.0013331832267774295, |
| "grad_norm": 45.25, |
| "learning_rate": 0.000499901468246599, |
| "loss": 53.4026, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0013628095207058168, |
| "grad_norm": 50.25, |
| "learning_rate": 0.0004998965293115915, |
| "loss": 52.993, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.001392435814634204, |
| "grad_norm": 59.0, |
| "learning_rate": 0.0004998915903765839, |
| "loss": 52.3891, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0014220621085625914, |
| "grad_norm": 61.0, |
| "learning_rate": 0.0004998866514415763, |
| "loss": 51.9013, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.0014516884024909787, |
| "grad_norm": 69.5, |
| "learning_rate": 0.0004998817125065688, |
| "loss": 51.5375, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.001481314696419366, |
| "grad_norm": 61.5, |
| "learning_rate": 0.0004998767735715612, |
| "loss": 50.8708, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0015109409903477534, |
| "grad_norm": 57.0, |
| "learning_rate": 0.0004998718346365537, |
| "loss": 50.515, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0015405672842761407, |
| "grad_norm": 56.75, |
| "learning_rate": 0.0004998668957015461, |
| "loss": 50.2177, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.001570193578204528, |
| "grad_norm": 39.25, |
| "learning_rate": 0.0004998619567665385, |
| "loss": 49.6777, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.0015998198721329153, |
| "grad_norm": 54.0, |
| "learning_rate": 0.000499857017831531, |
| "loss": 49.4364, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.0016294461660613028, |
| "grad_norm": 53.75, |
| "learning_rate": 0.0004998520788965234, |
| "loss": 49.0852, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0016590724599896902, |
| "grad_norm": 37.75, |
| "learning_rate": 0.0004998471399615158, |
| "loss": 48.7451, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.0016886987539180775, |
| "grad_norm": 44.0, |
| "learning_rate": 0.0004998422010265083, |
| "loss": 48.4963, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0017183250478464648, |
| "grad_norm": 64.5, |
| "learning_rate": 0.0004998372620915007, |
| "loss": 47.9198, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.001747951341774852, |
| "grad_norm": 35.25, |
| "learning_rate": 0.0004998323231564932, |
| "loss": 47.7004, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.0017775776357032394, |
| "grad_norm": 45.0, |
| "learning_rate": 0.0004998273842214856, |
| "loss": 47.231, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0018072039296316267, |
| "grad_norm": 65.0, |
| "learning_rate": 0.0004998224452864781, |
| "loss": 47.0502, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.001836830223560014, |
| "grad_norm": 43.25, |
| "learning_rate": 0.0004998175063514705, |
| "loss": 46.6445, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.0018664565174884014, |
| "grad_norm": 37.75, |
| "learning_rate": 0.0004998125674164629, |
| "loss": 46.3884, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.0018960828114167887, |
| "grad_norm": 85.0, |
| "learning_rate": 0.0004998076284814553, |
| "loss": 46.167, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.001925709105345176, |
| "grad_norm": 40.25, |
| "learning_rate": 0.0004998026895464478, |
| "loss": 45.9901, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.0019553353992735633, |
| "grad_norm": 42.5, |
| "learning_rate": 0.0004997977506114401, |
| "loss": 45.7832, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.0019849616932019506, |
| "grad_norm": 48.0, |
| "learning_rate": 0.0004997928116764325, |
| "loss": 45.4071, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.002014587987130338, |
| "grad_norm": 49.0, |
| "learning_rate": 0.000499787872741425, |
| "loss": 45.1021, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.0020442142810587253, |
| "grad_norm": 52.5, |
| "learning_rate": 0.0004997829338064174, |
| "loss": 44.8783, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.0020738405749871126, |
| "grad_norm": 43.0, |
| "learning_rate": 0.0004997779948714099, |
| "loss": 44.6055, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.0021034668689155, |
| "grad_norm": 44.25, |
| "learning_rate": 0.0004997730559364023, |
| "loss": 44.3711, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.002133093162843887, |
| "grad_norm": 50.75, |
| "learning_rate": 0.0004997681170013947, |
| "loss": 44.3173, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.0021627194567722745, |
| "grad_norm": 55.5, |
| "learning_rate": 0.0004997631780663872, |
| "loss": 43.861, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.002192345750700662, |
| "grad_norm": 43.5, |
| "learning_rate": 0.0004997582391313796, |
| "loss": 43.7341, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.002221972044629049, |
| "grad_norm": 44.75, |
| "learning_rate": 0.000499753300196372, |
| "loss": 43.6567, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0022515983385574365, |
| "grad_norm": 53.25, |
| "learning_rate": 0.0004997483612613645, |
| "loss": 43.2165, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.002281224632485824, |
| "grad_norm": 58.5, |
| "learning_rate": 0.0004997434223263569, |
| "loss": 43.1527, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.002310850926414211, |
| "grad_norm": 41.0, |
| "learning_rate": 0.0004997384833913494, |
| "loss": 42.8711, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.0023404772203425984, |
| "grad_norm": 37.75, |
| "learning_rate": 0.0004997335444563418, |
| "loss": 42.6794, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0023701035142709857, |
| "grad_norm": 39.0, |
| "learning_rate": 0.0004997286055213343, |
| "loss": 42.4436, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.002399729808199373, |
| "grad_norm": 42.25, |
| "learning_rate": 0.0004997236665863267, |
| "loss": 42.3047, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.0024293561021277604, |
| "grad_norm": 34.5, |
| "learning_rate": 0.0004997187276513191, |
| "loss": 42.0222, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.0024589823960561477, |
| "grad_norm": 43.75, |
| "learning_rate": 0.0004997137887163115, |
| "loss": 42.0059, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.002488608689984535, |
| "grad_norm": 42.75, |
| "learning_rate": 0.000499708849781304, |
| "loss": 41.7808, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.0025182349839129223, |
| "grad_norm": 46.75, |
| "learning_rate": 0.0004997039108462964, |
| "loss": 41.4766, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.0025478612778413096, |
| "grad_norm": 40.25, |
| "learning_rate": 0.0004996989719112888, |
| "loss": 41.4283, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.002577487571769697, |
| "grad_norm": 43.25, |
| "learning_rate": 0.0004996940329762813, |
| "loss": 41.2702, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.0026071138656980843, |
| "grad_norm": 36.25, |
| "learning_rate": 0.0004996890940412737, |
| "loss": 41.0024, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.0026367401596264716, |
| "grad_norm": 42.5, |
| "learning_rate": 0.0004996841551062662, |
| "loss": 40.9728, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.002666366453554859, |
| "grad_norm": 42.25, |
| "learning_rate": 0.0004996792161712586, |
| "loss": 40.778, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.0026959927474832462, |
| "grad_norm": 38.0, |
| "learning_rate": 0.0004996742772362511, |
| "loss": 40.5235, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.0027256190414116335, |
| "grad_norm": 51.5, |
| "learning_rate": 0.0004996693383012435, |
| "loss": 40.4312, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.002755245335340021, |
| "grad_norm": 65.5, |
| "learning_rate": 0.0004996643993662359, |
| "loss": 40.3023, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.002784871629268408, |
| "grad_norm": 38.5, |
| "learning_rate": 0.0004996594604312283, |
| "loss": 40.2053, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.0028144979231967955, |
| "grad_norm": 48.0, |
| "learning_rate": 0.0004996545214962208, |
| "loss": 39.8467, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.002844124217125183, |
| "grad_norm": 38.5, |
| "learning_rate": 0.0004996495825612131, |
| "loss": 39.7633, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.00287375051105357, |
| "grad_norm": 49.5, |
| "learning_rate": 0.0004996446436262055, |
| "loss": 39.7014, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.0029033768049819574, |
| "grad_norm": 41.75, |
| "learning_rate": 0.000499639704691198, |
| "loss": 39.505, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.0029330030989103448, |
| "grad_norm": 40.25, |
| "learning_rate": 0.0004996347657561904, |
| "loss": 39.3954, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.002962629392838732, |
| "grad_norm": 34.25, |
| "learning_rate": 0.0004996298268211829, |
| "loss": 39.2925, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.0029922556867671194, |
| "grad_norm": 61.25, |
| "learning_rate": 0.0004996248878861753, |
| "loss": 39.1232, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.0030218819806955067, |
| "grad_norm": 44.5, |
| "learning_rate": 0.0004996199489511677, |
| "loss": 39.0561, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.003051508274623894, |
| "grad_norm": 41.25, |
| "learning_rate": 0.0004996150100161602, |
| "loss": 38.8368, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.0030811345685522813, |
| "grad_norm": 36.75, |
| "learning_rate": 0.0004996100710811526, |
| "loss": 38.8122, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.0031107608624806687, |
| "grad_norm": 49.5, |
| "learning_rate": 0.000499605132146145, |
| "loss": 38.6779, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.003140387156409056, |
| "grad_norm": 32.0, |
| "learning_rate": 0.0004996001932111375, |
| "loss": 38.5136, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.0031700134503374433, |
| "grad_norm": 38.0, |
| "learning_rate": 0.0004995952542761299, |
| "loss": 38.3378, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.0031996397442658306, |
| "grad_norm": 61.5, |
| "learning_rate": 0.0004995903153411224, |
| "loss": 38.1704, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.003229266038194218, |
| "grad_norm": 31.875, |
| "learning_rate": 0.0004995853764061148, |
| "loss": 38.1518, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.0032588923321226057, |
| "grad_norm": 41.0, |
| "learning_rate": 0.0004995804374711073, |
| "loss": 38.0705, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.003288518626050993, |
| "grad_norm": 33.5, |
| "learning_rate": 0.0004995754985360997, |
| "loss": 37.9291, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.0033181449199793803, |
| "grad_norm": 48.25, |
| "learning_rate": 0.0004995705596010921, |
| "loss": 37.7261, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.0033477712139077676, |
| "grad_norm": 47.5, |
| "learning_rate": 0.0004995656206660845, |
| "loss": 37.64, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.003377397507836155, |
| "grad_norm": 30.75, |
| "learning_rate": 0.000499560681731077, |
| "loss": 37.554, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.0034070238017645423, |
| "grad_norm": 37.0, |
| "learning_rate": 0.0004995557427960694, |
| "loss": 37.4085, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.0034366500956929296, |
| "grad_norm": 40.0, |
| "learning_rate": 0.0004995508038610618, |
| "loss": 37.3319, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.003466276389621317, |
| "grad_norm": 26.0, |
| "learning_rate": 0.0004995458649260543, |
| "loss": 37.2325, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.003495902683549704, |
| "grad_norm": 44.0, |
| "learning_rate": 0.0004995409259910467, |
| "loss": 37.1101, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.0035255289774780915, |
| "grad_norm": 41.75, |
| "learning_rate": 0.0004995359870560392, |
| "loss": 36.9086, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.003555155271406479, |
| "grad_norm": 38.75, |
| "learning_rate": 0.0004995310481210316, |
| "loss": 36.8606, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.003584781565334866, |
| "grad_norm": 46.0, |
| "learning_rate": 0.0004995261091860241, |
| "loss": 36.8799, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.0036144078592632535, |
| "grad_norm": 49.0, |
| "learning_rate": 0.0004995211702510165, |
| "loss": 36.7591, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.003644034153191641, |
| "grad_norm": 28.625, |
| "learning_rate": 0.0004995162313160089, |
| "loss": 36.5001, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.003673660447120028, |
| "grad_norm": 32.75, |
| "learning_rate": 0.0004995112923810013, |
| "loss": 36.5076, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.0037032867410484154, |
| "grad_norm": 60.5, |
| "learning_rate": 0.0004995063534459938, |
| "loss": 36.4496, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.0037329130349768027, |
| "grad_norm": 51.5, |
| "learning_rate": 0.0004995014145109862, |
| "loss": 36.2772, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.00376253932890519, |
| "grad_norm": 37.75, |
| "learning_rate": 0.0004994964755759787, |
| "loss": 36.2552, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.0037921656228335774, |
| "grad_norm": 30.25, |
| "learning_rate": 0.000499491536640971, |
| "loss": 36.1079, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.0038217919167619647, |
| "grad_norm": 29.625, |
| "learning_rate": 0.0004994865977059635, |
| "loss": 36.0163, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.003851418210690352, |
| "grad_norm": 37.5, |
| "learning_rate": 0.0004994816587709559, |
| "loss": 35.8935, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.0038810445046187393, |
| "grad_norm": 41.25, |
| "learning_rate": 0.0004994767198359483, |
| "loss": 35.8832, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.003910670798547127, |
| "grad_norm": 35.25, |
| "learning_rate": 0.0004994717809009407, |
| "loss": 35.774, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.003940297092475514, |
| "grad_norm": 42.25, |
| "learning_rate": 0.0004994668419659332, |
| "loss": 35.6311, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.003969923386403901, |
| "grad_norm": 27.25, |
| "learning_rate": 0.0004994619030309256, |
| "loss": 35.6101, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.003999549680332289, |
| "grad_norm": 29.75, |
| "learning_rate": 0.000499456964095918, |
| "loss": 35.4244, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.004029175974260676, |
| "grad_norm": 33.75, |
| "learning_rate": 0.0004994520251609105, |
| "loss": 35.4439, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.004058802268189063, |
| "grad_norm": 34.5, |
| "learning_rate": 0.0004994470862259029, |
| "loss": 35.2244, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.0040884285621174505, |
| "grad_norm": 29.75, |
| "learning_rate": 0.0004994421472908954, |
| "loss": 35.2606, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.004118054856045838, |
| "grad_norm": 33.0, |
| "learning_rate": 0.0004994372083558878, |
| "loss": 35.1311, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.004147681149974225, |
| "grad_norm": 49.75, |
| "learning_rate": 0.0004994322694208803, |
| "loss": 35.135, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.0041773074439026125, |
| "grad_norm": 44.0, |
| "learning_rate": 0.0004994273304858727, |
| "loss": 34.8227, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.004206933737831, |
| "grad_norm": 34.0, |
| "learning_rate": 0.0004994223915508651, |
| "loss": 34.8308, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.004236560031759387, |
| "grad_norm": 34.0, |
| "learning_rate": 0.0004994174526158575, |
| "loss": 34.8619, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.004266186325687774, |
| "grad_norm": 34.25, |
| "learning_rate": 0.00049941251368085, |
| "loss": 34.7495, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.004295812619616162, |
| "grad_norm": 33.0, |
| "learning_rate": 0.0004994075747458424, |
| "loss": 34.665, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.004325438913544549, |
| "grad_norm": 31.25, |
| "learning_rate": 0.0004994026358108349, |
| "loss": 34.6436, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.004355065207472936, |
| "grad_norm": 44.25, |
| "learning_rate": 0.0004993976968758273, |
| "loss": 34.5006, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.004384691501401324, |
| "grad_norm": 37.25, |
| "learning_rate": 0.0004993927579408197, |
| "loss": 34.4433, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.004414317795329711, |
| "grad_norm": 33.0, |
| "learning_rate": 0.0004993878190058122, |
| "loss": 34.3807, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.004443944089258098, |
| "grad_norm": 36.5, |
| "learning_rate": 0.0004993828800708046, |
| "loss": 34.2393, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.004473570383186486, |
| "grad_norm": 58.75, |
| "learning_rate": 0.0004993779411357971, |
| "loss": 34.2823, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.004503196677114873, |
| "grad_norm": 31.0, |
| "learning_rate": 0.0004993730022007895, |
| "loss": 34.1576, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.00453282297104326, |
| "grad_norm": 26.625, |
| "learning_rate": 0.0004993680632657819, |
| "loss": 34.1282, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.004562449264971648, |
| "grad_norm": 35.25, |
| "learning_rate": 0.0004993631243307743, |
| "loss": 33.9517, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.004592075558900035, |
| "grad_norm": 30.875, |
| "learning_rate": 0.0004993581853957668, |
| "loss": 33.9367, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.004621701852828422, |
| "grad_norm": 30.375, |
| "learning_rate": 0.0004993532464607592, |
| "loss": 33.8674, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.0046513281467568095, |
| "grad_norm": 37.0, |
| "learning_rate": 0.0004993483075257517, |
| "loss": 33.7807, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.004680954440685197, |
| "grad_norm": 50.25, |
| "learning_rate": 0.000499343368590744, |
| "loss": 33.749, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.004710580734613584, |
| "grad_norm": 31.875, |
| "learning_rate": 0.0004993384296557365, |
| "loss": 33.6065, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.0047402070285419715, |
| "grad_norm": 31.375, |
| "learning_rate": 0.0004993334907207289, |
| "loss": 33.4564, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.004769833322470359, |
| "grad_norm": 34.0, |
| "learning_rate": 0.0004993285517857213, |
| "loss": 33.531, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.004799459616398746, |
| "grad_norm": 31.25, |
| "learning_rate": 0.0004993236128507137, |
| "loss": 33.4034, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.0048290859103271334, |
| "grad_norm": 31.0, |
| "learning_rate": 0.0004993186739157062, |
| "loss": 33.4539, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.004858712204255521, |
| "grad_norm": 31.75, |
| "learning_rate": 0.0004993137349806986, |
| "loss": 33.2341, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.004888338498183908, |
| "grad_norm": 34.5, |
| "learning_rate": 0.000499308796045691, |
| "loss": 33.2486, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.004917964792112295, |
| "grad_norm": 27.375, |
| "learning_rate": 0.0004993038571106835, |
| "loss": 33.2143, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.004947591086040683, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004992989181756759, |
| "loss": 33.0835, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.00497721737996907, |
| "grad_norm": 31.125, |
| "learning_rate": 0.0004992939792406684, |
| "loss": 33.1259, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.005006843673897457, |
| "grad_norm": 35.5, |
| "learning_rate": 0.0004992890403056608, |
| "loss": 32.995, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.005036469967825845, |
| "grad_norm": 26.0, |
| "learning_rate": 0.0004992841013706533, |
| "loss": 32.8331, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.005066096261754232, |
| "grad_norm": 24.875, |
| "learning_rate": 0.0004992791624356457, |
| "loss": 32.8863, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.005095722555682619, |
| "grad_norm": 38.5, |
| "learning_rate": 0.0004992742235006381, |
| "loss": 32.7908, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.005125348849611007, |
| "grad_norm": 35.5, |
| "learning_rate": 0.0004992692845656305, |
| "loss": 32.7276, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.005154975143539394, |
| "grad_norm": 34.0, |
| "learning_rate": 0.000499264345630623, |
| "loss": 32.7076, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.005184601437467781, |
| "grad_norm": 28.625, |
| "learning_rate": 0.0004992594066956154, |
| "loss": 32.6881, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.0052142277313961686, |
| "grad_norm": 31.5, |
| "learning_rate": 0.0004992544677606079, |
| "loss": 32.5494, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.005243854025324556, |
| "grad_norm": 28.25, |
| "learning_rate": 0.0004992495288256003, |
| "loss": 32.5348, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.005273480319252943, |
| "grad_norm": 38.75, |
| "learning_rate": 0.0004992445898905928, |
| "loss": 32.477, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.0053031066131813305, |
| "grad_norm": 44.25, |
| "learning_rate": 0.0004992396509555852, |
| "loss": 32.3766, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.005332732907109718, |
| "grad_norm": 31.25, |
| "learning_rate": 0.0004992347120205776, |
| "loss": 32.3675, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.005362359201038105, |
| "grad_norm": 32.5, |
| "learning_rate": 0.0004992297730855701, |
| "loss": 32.3194, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.0053919854949664925, |
| "grad_norm": 33.0, |
| "learning_rate": 0.0004992248341505625, |
| "loss": 32.2312, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.00542161178889488, |
| "grad_norm": 26.0, |
| "learning_rate": 0.0004992198952155549, |
| "loss": 32.2567, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.005451238082823267, |
| "grad_norm": 30.75, |
| "learning_rate": 0.0004992149562805473, |
| "loss": 32.1444, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.005480864376751654, |
| "grad_norm": 31.75, |
| "learning_rate": 0.0004992100173455398, |
| "loss": 32.1415, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.005510490670680042, |
| "grad_norm": 29.25, |
| "learning_rate": 0.0004992050784105322, |
| "loss": 32.0721, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.005540116964608429, |
| "grad_norm": 34.25, |
| "learning_rate": 0.0004992001394755247, |
| "loss": 31.9281, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.005569743258536816, |
| "grad_norm": 30.75, |
| "learning_rate": 0.000499195200540517, |
| "loss": 31.8502, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.005599369552465204, |
| "grad_norm": 37.25, |
| "learning_rate": 0.0004991902616055095, |
| "loss": 31.8956, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.005628995846393591, |
| "grad_norm": 25.375, |
| "learning_rate": 0.0004991853226705019, |
| "loss": 31.8152, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.005658622140321978, |
| "grad_norm": 32.5, |
| "learning_rate": 0.0004991803837354943, |
| "loss": 31.7686, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.005688248434250366, |
| "grad_norm": 27.0, |
| "learning_rate": 0.0004991754448004867, |
| "loss": 31.7624, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.005717874728178753, |
| "grad_norm": 31.125, |
| "learning_rate": 0.0004991705058654792, |
| "loss": 31.6764, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.00574750102210714, |
| "grad_norm": 27.0, |
| "learning_rate": 0.0004991655669304716, |
| "loss": 31.5634, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.0057771273160355276, |
| "grad_norm": 42.75, |
| "learning_rate": 0.0004991606279954641, |
| "loss": 31.531, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.005806753609963915, |
| "grad_norm": 20.25, |
| "learning_rate": 0.0004991556890604565, |
| "loss": 31.5578, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.005836379903892302, |
| "grad_norm": 28.75, |
| "learning_rate": 0.000499150750125449, |
| "loss": 31.3939, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.0058660061978206895, |
| "grad_norm": 37.75, |
| "learning_rate": 0.0004991458111904414, |
| "loss": 31.483, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.005895632491749077, |
| "grad_norm": 27.125, |
| "learning_rate": 0.0004991408722554338, |
| "loss": 31.3699, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.005925258785677464, |
| "grad_norm": 34.0, |
| "learning_rate": 0.0004991359333204263, |
| "loss": 31.326, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.0059548850796058515, |
| "grad_norm": 36.0, |
| "learning_rate": 0.0004991309943854187, |
| "loss": 31.2502, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.005984511373534239, |
| "grad_norm": 28.125, |
| "learning_rate": 0.0004991260554504111, |
| "loss": 31.206, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.006014137667462626, |
| "grad_norm": 27.875, |
| "learning_rate": 0.0004991211165154035, |
| "loss": 31.1183, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.006043763961391013, |
| "grad_norm": 26.625, |
| "learning_rate": 0.000499116177580396, |
| "loss": 31.0616, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.006073390255319401, |
| "grad_norm": 33.25, |
| "learning_rate": 0.0004991112386453884, |
| "loss": 31.0764, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.006103016549247788, |
| "grad_norm": 33.25, |
| "learning_rate": 0.0004991062997103809, |
| "loss": 31.1013, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.006132642843176175, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004991013607753733, |
| "loss": 30.9594, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.006162269137104563, |
| "grad_norm": 35.5, |
| "learning_rate": 0.0004990964218403658, |
| "loss": 30.9666, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.00619189543103295, |
| "grad_norm": 31.125, |
| "learning_rate": 0.0004990914829053582, |
| "loss": 30.9864, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.006221521724961337, |
| "grad_norm": 39.75, |
| "learning_rate": 0.0004990865439703506, |
| "loss": 30.8923, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.006251148018889725, |
| "grad_norm": 36.75, |
| "learning_rate": 0.0004990816050353431, |
| "loss": 30.8131, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.006280774312818112, |
| "grad_norm": 33.25, |
| "learning_rate": 0.0004990766661003355, |
| "loss": 30.7942, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.006310400606746499, |
| "grad_norm": 25.625, |
| "learning_rate": 0.0004990717271653279, |
| "loss": 30.6945, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.006340026900674887, |
| "grad_norm": 36.5, |
| "learning_rate": 0.0004990667882303204, |
| "loss": 30.5995, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.006369653194603274, |
| "grad_norm": 29.0, |
| "learning_rate": 0.0004990618492953128, |
| "loss": 30.6263, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.006399279488531661, |
| "grad_norm": 25.625, |
| "learning_rate": 0.0004990569103603052, |
| "loss": 30.5024, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.0064289057824600485, |
| "grad_norm": 27.375, |
| "learning_rate": 0.0004990519714252977, |
| "loss": 30.5216, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.006458532076388436, |
| "grad_norm": 29.625, |
| "learning_rate": 0.00049904703249029, |
| "loss": 30.4789, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.006488158370316823, |
| "grad_norm": 29.0, |
| "learning_rate": 0.0004990420935552825, |
| "loss": 30.3852, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.006517784664245211, |
| "grad_norm": 36.25, |
| "learning_rate": 0.0004990371546202749, |
| "loss": 30.4001, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.006547410958173599, |
| "grad_norm": 27.75, |
| "learning_rate": 0.0004990322156852673, |
| "loss": 30.4478, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.006577037252101986, |
| "grad_norm": 35.25, |
| "learning_rate": 0.0004990272767502597, |
| "loss": 30.3411, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.006606663546030373, |
| "grad_norm": 25.625, |
| "learning_rate": 0.0004990223378152522, |
| "loss": 30.2584, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.006636289839958761, |
| "grad_norm": 27.625, |
| "learning_rate": 0.0004990173988802446, |
| "loss": 30.2584, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.006665916133887148, |
| "grad_norm": 28.25, |
| "learning_rate": 0.0004990124599452371, |
| "loss": 30.2201, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.006695542427815535, |
| "grad_norm": 29.0, |
| "learning_rate": 0.0004990075210102295, |
| "loss": 30.1349, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.0067251687217439226, |
| "grad_norm": 21.375, |
| "learning_rate": 0.000499002582075222, |
| "loss": 30.1322, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.00675479501567231, |
| "grad_norm": 34.5, |
| "learning_rate": 0.0004989976431402144, |
| "loss": 30.1664, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.006784421309600697, |
| "grad_norm": 28.0, |
| "learning_rate": 0.0004989927042052068, |
| "loss": 30.1264, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.0068140476035290845, |
| "grad_norm": 29.875, |
| "learning_rate": 0.0004989877652701993, |
| "loss": 29.9802, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.006843673897457472, |
| "grad_norm": 31.625, |
| "learning_rate": 0.0004989828263351917, |
| "loss": 30.0028, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.006873300191385859, |
| "grad_norm": 33.5, |
| "learning_rate": 0.0004989778874001841, |
| "loss": 29.954, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.0069029264853142465, |
| "grad_norm": 28.125, |
| "learning_rate": 0.0004989729484651765, |
| "loss": 29.8774, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.006932552779242634, |
| "grad_norm": 31.25, |
| "learning_rate": 0.000498968009530169, |
| "loss": 29.8508, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.006962179073171021, |
| "grad_norm": 30.0, |
| "learning_rate": 0.0004989630705951614, |
| "loss": 29.8786, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.006991805367099408, |
| "grad_norm": 26.0, |
| "learning_rate": 0.0004989581316601539, |
| "loss": 29.789, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.007021431661027796, |
| "grad_norm": 29.625, |
| "learning_rate": 0.0004989531927251463, |
| "loss": 29.708, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.007051057954956183, |
| "grad_norm": 32.0, |
| "learning_rate": 0.0004989482537901388, |
| "loss": 29.7006, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.00708068424888457, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004989433148551312, |
| "loss": 29.546, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.007110310542812958, |
| "grad_norm": 30.125, |
| "learning_rate": 0.0004989383759201236, |
| "loss": 29.5634, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.007139936836741345, |
| "grad_norm": 36.5, |
| "learning_rate": 0.0004989334369851161, |
| "loss": 29.5994, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.007169563130669732, |
| "grad_norm": 24.375, |
| "learning_rate": 0.0004989284980501085, |
| "loss": 29.5168, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.00719918942459812, |
| "grad_norm": 32.25, |
| "learning_rate": 0.0004989235591151009, |
| "loss": 29.4858, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.007228815718526507, |
| "grad_norm": 22.0, |
| "learning_rate": 0.0004989186201800934, |
| "loss": 29.4208, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.007258442012454894, |
| "grad_norm": 24.625, |
| "learning_rate": 0.0004989136812450858, |
| "loss": 29.4053, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.007288068306383282, |
| "grad_norm": 26.625, |
| "learning_rate": 0.0004989087423100783, |
| "loss": 29.4063, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.007317694600311669, |
| "grad_norm": 27.25, |
| "learning_rate": 0.0004989038033750707, |
| "loss": 29.3743, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.007347320894240056, |
| "grad_norm": 22.625, |
| "learning_rate": 0.000498898864440063, |
| "loss": 29.3818, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.0073769471881684435, |
| "grad_norm": 24.5, |
| "learning_rate": 0.0004988939255050556, |
| "loss": 29.2662, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.007406573482096831, |
| "grad_norm": 30.5, |
| "learning_rate": 0.0004988889865700479, |
| "loss": 29.3027, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.007436199776025218, |
| "grad_norm": 24.5, |
| "learning_rate": 0.0004988840476350403, |
| "loss": 29.1511, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.0074658260699536055, |
| "grad_norm": 24.25, |
| "learning_rate": 0.0004988791087000327, |
| "loss": 29.2143, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.007495452363881993, |
| "grad_norm": 27.25, |
| "learning_rate": 0.0004988741697650252, |
| "loss": 29.1494, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.00752507865781038, |
| "grad_norm": 21.25, |
| "learning_rate": 0.0004988692308300176, |
| "loss": 29.1171, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.007554704951738767, |
| "grad_norm": 29.0, |
| "learning_rate": 0.0004988642918950101, |
| "loss": 29.1236, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.007584331245667155, |
| "grad_norm": 40.0, |
| "learning_rate": 0.0004988593529600025, |
| "loss": 29.0323, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.007613957539595542, |
| "grad_norm": 23.5, |
| "learning_rate": 0.000498854414024995, |
| "loss": 28.9919, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.007643583833523929, |
| "grad_norm": 22.375, |
| "learning_rate": 0.0004988494750899874, |
| "loss": 28.9232, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.007673210127452317, |
| "grad_norm": 35.5, |
| "learning_rate": 0.0004988445361549798, |
| "loss": 28.918, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.007702836421380704, |
| "grad_norm": 26.75, |
| "learning_rate": 0.0004988395972199723, |
| "loss": 28.9208, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.007732462715309091, |
| "grad_norm": 29.25, |
| "learning_rate": 0.0004988346582849647, |
| "loss": 28.9147, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.007762089009237479, |
| "grad_norm": 26.375, |
| "learning_rate": 0.0004988297193499571, |
| "loss": 28.8134, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.007791715303165866, |
| "grad_norm": 27.25, |
| "learning_rate": 0.0004988247804149496, |
| "loss": 28.8583, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.007821341597094253, |
| "grad_norm": 24.375, |
| "learning_rate": 0.000498819841479942, |
| "loss": 28.7596, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.00785096789102264, |
| "grad_norm": 27.375, |
| "learning_rate": 0.0004988149025449345, |
| "loss": 28.7184, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.007880594184951028, |
| "grad_norm": 24.5, |
| "learning_rate": 0.0004988099636099269, |
| "loss": 28.7668, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.007910220478879414, |
| "grad_norm": 23.125, |
| "learning_rate": 0.0004988050246749193, |
| "loss": 28.7422, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.007939846772807803, |
| "grad_norm": 27.875, |
| "learning_rate": 0.0004988000857399118, |
| "loss": 28.7021, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.007969473066736189, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004987951468049042, |
| "loss": 28.6588, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.007999099360664577, |
| "grad_norm": 25.375, |
| "learning_rate": 0.0004987902078698966, |
| "loss": 28.5846, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.008028725654592964, |
| "grad_norm": 30.125, |
| "learning_rate": 0.0004987852689348891, |
| "loss": 28.6113, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.008058351948521352, |
| "grad_norm": 22.5, |
| "learning_rate": 0.0004987803299998815, |
| "loss": 28.5202, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.008087978242449738, |
| "grad_norm": 31.0, |
| "learning_rate": 0.0004987753910648739, |
| "loss": 28.5026, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.008117604536378126, |
| "grad_norm": 33.25, |
| "learning_rate": 0.0004987704521298664, |
| "loss": 28.5127, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.008147230830306513, |
| "grad_norm": 29.0, |
| "learning_rate": 0.0004987655131948588, |
| "loss": 28.4538, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.008176857124234901, |
| "grad_norm": 22.125, |
| "learning_rate": 0.0004987605742598513, |
| "loss": 28.457, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.008206483418163288, |
| "grad_norm": 31.625, |
| "learning_rate": 0.0004987556353248437, |
| "loss": 28.4426, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.008236109712091676, |
| "grad_norm": 23.75, |
| "learning_rate": 0.000498750696389836, |
| "loss": 28.3837, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.008265736006020062, |
| "grad_norm": 23.25, |
| "learning_rate": 0.0004987457574548286, |
| "loss": 28.3155, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.00829536229994845, |
| "grad_norm": 22.75, |
| "learning_rate": 0.0004987408185198209, |
| "loss": 28.2806, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.008324988593876837, |
| "grad_norm": 23.875, |
| "learning_rate": 0.0004987358795848133, |
| "loss": 28.2933, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.008354614887805225, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004987309406498058, |
| "loss": 28.2405, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.008384241181733611, |
| "grad_norm": 29.0, |
| "learning_rate": 0.0004987260017147982, |
| "loss": 28.22, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.008413867475662, |
| "grad_norm": 23.5, |
| "learning_rate": 0.0004987210627797906, |
| "loss": 28.1765, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.008443493769590386, |
| "grad_norm": 23.25, |
| "learning_rate": 0.0004987161238447831, |
| "loss": 28.208, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.008473120063518774, |
| "grad_norm": 27.5, |
| "learning_rate": 0.0004987111849097755, |
| "loss": 28.1571, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.00850274635744716, |
| "grad_norm": 27.5, |
| "learning_rate": 0.000498706245974768, |
| "loss": 28.0758, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.008532372651375549, |
| "grad_norm": 25.125, |
| "learning_rate": 0.0004987013070397604, |
| "loss": 28.0146, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.008561998945303935, |
| "grad_norm": 22.75, |
| "learning_rate": 0.0004986963681047528, |
| "loss": 28.0971, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.008591625239232323, |
| "grad_norm": 27.0, |
| "learning_rate": 0.0004986914291697453, |
| "loss": 27.9061, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.00862125153316071, |
| "grad_norm": 28.75, |
| "learning_rate": 0.0004986864902347377, |
| "loss": 27.9707, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.008650877827089098, |
| "grad_norm": 25.625, |
| "learning_rate": 0.0004986815512997301, |
| "loss": 27.9883, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.008680504121017485, |
| "grad_norm": 21.375, |
| "learning_rate": 0.0004986766123647226, |
| "loss": 27.8901, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.008710130414945873, |
| "grad_norm": 27.75, |
| "learning_rate": 0.000498671673429715, |
| "loss": 27.8871, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.008739756708874261, |
| "grad_norm": 21.625, |
| "learning_rate": 0.0004986667344947075, |
| "loss": 27.9139, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.008769383002802647, |
| "grad_norm": 25.0, |
| "learning_rate": 0.0004986617955596999, |
| "loss": 27.8552, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.008799009296731036, |
| "grad_norm": 29.875, |
| "learning_rate": 0.0004986568566246923, |
| "loss": 27.7934, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.008828635590659422, |
| "grad_norm": 27.875, |
| "learning_rate": 0.0004986519176896848, |
| "loss": 27.8324, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.00885826188458781, |
| "grad_norm": 31.5, |
| "learning_rate": 0.0004986469787546772, |
| "loss": 27.7529, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.008887888178516197, |
| "grad_norm": 20.25, |
| "learning_rate": 0.0004986420398196696, |
| "loss": 27.7292, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.008917514472444585, |
| "grad_norm": 25.125, |
| "learning_rate": 0.0004986371008846621, |
| "loss": 27.7583, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.008947140766372971, |
| "grad_norm": 25.25, |
| "learning_rate": 0.0004986321619496545, |
| "loss": 27.7517, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.00897676706030136, |
| "grad_norm": 25.0, |
| "learning_rate": 0.0004986272230146469, |
| "loss": 27.6723, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.009006393354229746, |
| "grad_norm": 27.125, |
| "learning_rate": 0.0004986222840796394, |
| "loss": 27.6539, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.009036019648158134, |
| "grad_norm": 21.5, |
| "learning_rate": 0.0004986173451446318, |
| "loss": 27.5415, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.00906564594208652, |
| "grad_norm": 29.125, |
| "learning_rate": 0.0004986124062096243, |
| "loss": 27.6001, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.009095272236014909, |
| "grad_norm": 29.625, |
| "learning_rate": 0.0004986074672746167, |
| "loss": 27.531, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.009124898529943295, |
| "grad_norm": 27.125, |
| "learning_rate": 0.000498602528339609, |
| "loss": 27.6403, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.009154524823871683, |
| "grad_norm": 24.625, |
| "learning_rate": 0.0004985975894046016, |
| "loss": 27.5193, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.00918415111780007, |
| "grad_norm": 26.375, |
| "learning_rate": 0.0004985926504695939, |
| "loss": 27.5217, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.009213777411728458, |
| "grad_norm": 20.625, |
| "learning_rate": 0.0004985877115345863, |
| "loss": 27.4847, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.009243403705656844, |
| "grad_norm": 34.75, |
| "learning_rate": 0.0004985827725995788, |
| "loss": 27.4528, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.009273029999585233, |
| "grad_norm": 22.75, |
| "learning_rate": 0.0004985778336645712, |
| "loss": 27.4023, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.009302656293513619, |
| "grad_norm": 26.125, |
| "learning_rate": 0.0004985728947295637, |
| "loss": 27.3692, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.009332282587442007, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004985679557945561, |
| "loss": 27.4008, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.009361908881370394, |
| "grad_norm": 24.625, |
| "learning_rate": 0.0004985630168595486, |
| "loss": 27.4807, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.009391535175298782, |
| "grad_norm": 24.75, |
| "learning_rate": 0.000498558077924541, |
| "loss": 27.3572, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.009421161469227168, |
| "grad_norm": 24.5, |
| "learning_rate": 0.0004985531389895334, |
| "loss": 27.3003, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.009450787763155557, |
| "grad_norm": 23.0, |
| "learning_rate": 0.0004985482000545258, |
| "loss": 27.1815, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.009480414057083943, |
| "grad_norm": 23.125, |
| "learning_rate": 0.0004985432611195183, |
| "loss": 27.2178, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.009510040351012331, |
| "grad_norm": 29.25, |
| "learning_rate": 0.0004985383221845107, |
| "loss": 27.2077, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.009539666644940718, |
| "grad_norm": 19.625, |
| "learning_rate": 0.0004985333832495031, |
| "loss": 27.2465, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.009569292938869106, |
| "grad_norm": 25.125, |
| "learning_rate": 0.0004985284443144956, |
| "loss": 27.1899, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.009598919232797492, |
| "grad_norm": 27.375, |
| "learning_rate": 0.000498523505379488, |
| "loss": 27.2459, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.00962854552672588, |
| "grad_norm": 22.625, |
| "learning_rate": 0.0004985185664444805, |
| "loss": 27.1158, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.009658171820654267, |
| "grad_norm": 26.125, |
| "learning_rate": 0.0004985136275094729, |
| "loss": 27.2161, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.009687798114582655, |
| "grad_norm": 25.0, |
| "learning_rate": 0.0004985086885744653, |
| "loss": 27.0711, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.009717424408511042, |
| "grad_norm": 20.5, |
| "learning_rate": 0.0004985037496394578, |
| "loss": 27.0087, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.00974705070243943, |
| "grad_norm": 21.125, |
| "learning_rate": 0.0004984988107044502, |
| "loss": 27.0451, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.009776676996367816, |
| "grad_norm": 22.0, |
| "learning_rate": 0.0004984938717694426, |
| "loss": 27.0665, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.009806303290296204, |
| "grad_norm": 18.125, |
| "learning_rate": 0.0004984889328344351, |
| "loss": 27.0015, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.00983592958422459, |
| "grad_norm": 27.125, |
| "learning_rate": 0.0004984839938994275, |
| "loss": 26.9989, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.009865555878152979, |
| "grad_norm": 26.375, |
| "learning_rate": 0.00049847905496442, |
| "loss": 26.991, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.009895182172081365, |
| "grad_norm": 21.125, |
| "learning_rate": 0.0004984741160294124, |
| "loss": 26.9069, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.009924808466009754, |
| "grad_norm": 25.0, |
| "learning_rate": 0.0004984691770944048, |
| "loss": 26.9706, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.00995443475993814, |
| "grad_norm": 26.0, |
| "learning_rate": 0.0004984642381593973, |
| "loss": 26.9443, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.009984061053866528, |
| "grad_norm": 22.5, |
| "learning_rate": 0.0004984592992243897, |
| "loss": 26.9991, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.010013687347794915, |
| "grad_norm": 25.125, |
| "learning_rate": 0.000498454360289382, |
| "loss": 26.8303, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.010043313641723303, |
| "grad_norm": 21.625, |
| "learning_rate": 0.0004984494213543746, |
| "loss": 26.7858, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.01007293993565169, |
| "grad_norm": 24.25, |
| "learning_rate": 0.000498444482419367, |
| "loss": 26.7472, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.010102566229580077, |
| "grad_norm": 21.25, |
| "learning_rate": 0.0004984395434843593, |
| "loss": 26.7279, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.010132192523508464, |
| "grad_norm": 28.125, |
| "learning_rate": 0.0004984346045493518, |
| "loss": 26.7243, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.010161818817436852, |
| "grad_norm": 22.0, |
| "learning_rate": 0.0004984296656143442, |
| "loss": 26.7933, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.010191445111365239, |
| "grad_norm": 21.375, |
| "learning_rate": 0.0004984247266793367, |
| "loss": 26.7171, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.010221071405293627, |
| "grad_norm": 26.625, |
| "learning_rate": 0.0004984197877443291, |
| "loss": 26.6968, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.010250697699222013, |
| "grad_norm": 23.25, |
| "learning_rate": 0.0004984148488093216, |
| "loss": 26.6967, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.010280323993150401, |
| "grad_norm": 20.875, |
| "learning_rate": 0.000498409909874314, |
| "loss": 26.6935, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.010309950287078788, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004984049709393064, |
| "loss": 26.6697, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.010339576581007176, |
| "grad_norm": 21.25, |
| "learning_rate": 0.0004984000320042988, |
| "loss": 26.6018, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.010369202874935562, |
| "grad_norm": 19.125, |
| "learning_rate": 0.0004983950930692913, |
| "loss": 26.5833, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.01039882916886395, |
| "grad_norm": 21.0, |
| "learning_rate": 0.0004983901541342837, |
| "loss": 26.6342, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.010428455462792337, |
| "grad_norm": 26.875, |
| "learning_rate": 0.0004983852151992761, |
| "loss": 26.6716, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.010458081756720725, |
| "grad_norm": 25.25, |
| "learning_rate": 0.0004983802762642686, |
| "loss": 26.517, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.010487708050649112, |
| "grad_norm": 23.75, |
| "learning_rate": 0.000498375337329261, |
| "loss": 26.5071, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.0105173343445775, |
| "grad_norm": 28.75, |
| "learning_rate": 0.0004983703983942535, |
| "loss": 26.4937, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.010546960638505886, |
| "grad_norm": 24.625, |
| "learning_rate": 0.0004983654594592459, |
| "loss": 26.5264, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.010576586932434275, |
| "grad_norm": 24.75, |
| "learning_rate": 0.0004983605205242383, |
| "loss": 26.4541, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.010606213226362661, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004983555815892308, |
| "loss": 26.4, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.01063583952029105, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004983506426542232, |
| "loss": 26.3095, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.010665465814219436, |
| "grad_norm": 23.875, |
| "learning_rate": 0.0004983457037192156, |
| "loss": 26.4476, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.010695092108147824, |
| "grad_norm": 20.125, |
| "learning_rate": 0.0004983407647842081, |
| "loss": 26.4167, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.01072471840207621, |
| "grad_norm": 25.125, |
| "learning_rate": 0.0004983358258492005, |
| "loss": 26.3873, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.010754344696004598, |
| "grad_norm": 20.0, |
| "learning_rate": 0.000498330886914193, |
| "loss": 26.2724, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.010783970989932985, |
| "grad_norm": 20.5, |
| "learning_rate": 0.0004983259479791854, |
| "loss": 26.3225, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.010813597283861373, |
| "grad_norm": 31.625, |
| "learning_rate": 0.0004983210090441779, |
| "loss": 26.2387, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.01084322357778976, |
| "grad_norm": 25.25, |
| "learning_rate": 0.0004983160701091703, |
| "loss": 26.2738, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.010872849871718148, |
| "grad_norm": 22.125, |
| "learning_rate": 0.0004983111311741627, |
| "loss": 26.2851, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.010902476165646534, |
| "grad_norm": 23.875, |
| "learning_rate": 0.000498306192239155, |
| "loss": 26.1881, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.010932102459574922, |
| "grad_norm": 21.625, |
| "learning_rate": 0.0004983012533041476, |
| "loss": 26.1651, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.010961728753503309, |
| "grad_norm": 21.0, |
| "learning_rate": 0.00049829631436914, |
| "loss": 26.2036, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.010991355047431697, |
| "grad_norm": 20.25, |
| "learning_rate": 0.0004982913754341323, |
| "loss": 26.1508, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.011020981341360083, |
| "grad_norm": 21.375, |
| "learning_rate": 0.0004982864364991248, |
| "loss": 26.2561, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.011050607635288472, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004982814975641172, |
| "loss": 26.0361, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.011080233929216858, |
| "grad_norm": 18.875, |
| "learning_rate": 0.0004982765586291097, |
| "loss": 26.0209, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.011109860223145246, |
| "grad_norm": 21.5, |
| "learning_rate": 0.0004982716196941021, |
| "loss": 26.0895, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.011139486517073633, |
| "grad_norm": 18.5, |
| "learning_rate": 0.0004982666807590946, |
| "loss": 26.0248, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.011169112811002021, |
| "grad_norm": 21.125, |
| "learning_rate": 0.000498261741824087, |
| "loss": 25.9958, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.011198739104930407, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004982568028890794, |
| "loss": 26.1197, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.011228365398858796, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004982518639540718, |
| "loss": 26.0943, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.011257991692787182, |
| "grad_norm": 21.875, |
| "learning_rate": 0.0004982469250190643, |
| "loss": 25.9681, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.01128761798671557, |
| "grad_norm": 20.625, |
| "learning_rate": 0.0004982419860840567, |
| "loss": 25.9861, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.011317244280643957, |
| "grad_norm": 21.75, |
| "learning_rate": 0.0004982370471490492, |
| "loss": 26.004, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.011346870574572345, |
| "grad_norm": 20.0, |
| "learning_rate": 0.0004982321082140416, |
| "loss": 25.96, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.011376496868500731, |
| "grad_norm": 24.25, |
| "learning_rate": 0.0004982271692790341, |
| "loss": 26.0032, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.01140612316242912, |
| "grad_norm": 23.0, |
| "learning_rate": 0.0004982222303440265, |
| "loss": 25.9794, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.011435749456357506, |
| "grad_norm": 26.375, |
| "learning_rate": 0.0004982172914090189, |
| "loss": 25.8826, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.011465375750285894, |
| "grad_norm": 19.125, |
| "learning_rate": 0.0004982123524740113, |
| "loss": 25.8303, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.01149500204421428, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004982074135390038, |
| "loss": 25.8461, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.011524628338142669, |
| "grad_norm": 20.625, |
| "learning_rate": 0.0004982024746039962, |
| "loss": 25.8709, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.011554254632071055, |
| "grad_norm": 21.875, |
| "learning_rate": 0.0004981975356689886, |
| "loss": 25.7963, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.011583880925999443, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004981925967339811, |
| "loss": 25.7629, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.01161350721992783, |
| "grad_norm": 25.875, |
| "learning_rate": 0.0004981876577989735, |
| "loss": 25.7592, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.011643133513856218, |
| "grad_norm": 21.25, |
| "learning_rate": 0.000498182718863966, |
| "loss": 25.7715, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.011672759807784604, |
| "grad_norm": 19.375, |
| "learning_rate": 0.0004981777799289584, |
| "loss": 25.7106, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.011702386101712993, |
| "grad_norm": 24.875, |
| "learning_rate": 0.0004981728409939509, |
| "loss": 25.7222, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.011732012395641379, |
| "grad_norm": 22.25, |
| "learning_rate": 0.0004981679020589433, |
| "loss": 25.7513, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.011761638689569767, |
| "grad_norm": 20.625, |
| "learning_rate": 0.0004981629631239357, |
| "loss": 25.6742, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.011791264983498154, |
| "grad_norm": 25.375, |
| "learning_rate": 0.000498158024188928, |
| "loss": 25.719, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.011820891277426542, |
| "grad_norm": 19.625, |
| "learning_rate": 0.0004981530852539206, |
| "loss": 25.6421, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.011850517571354928, |
| "grad_norm": 22.75, |
| "learning_rate": 0.000498148146318913, |
| "loss": 25.7049, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.011880143865283316, |
| "grad_norm": 22.5, |
| "learning_rate": 0.0004981432073839053, |
| "loss": 25.6611, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.011909770159211703, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004981382684488978, |
| "loss": 25.6121, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.011939396453140091, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004981333295138902, |
| "loss": 25.6006, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.011969022747068478, |
| "grad_norm": 18.625, |
| "learning_rate": 0.0004981283905788827, |
| "loss": 25.5466, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.011998649040996866, |
| "grad_norm": 21.375, |
| "learning_rate": 0.0004981234516438751, |
| "loss": 25.5528, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.012028275334925252, |
| "grad_norm": 19.125, |
| "learning_rate": 0.0004981185127088676, |
| "loss": 25.5331, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.01205790162885364, |
| "grad_norm": 21.0, |
| "learning_rate": 0.00049811357377386, |
| "loss": 25.6046, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.012087527922782027, |
| "grad_norm": 32.0, |
| "learning_rate": 0.0004981086348388524, |
| "loss": 25.4727, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.012117154216710415, |
| "grad_norm": 19.125, |
| "learning_rate": 0.0004981036959038448, |
| "loss": 25.4483, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.012146780510638801, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004980987569688373, |
| "loss": 25.5121, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.01217640680456719, |
| "grad_norm": 20.0, |
| "learning_rate": 0.0004980938180338297, |
| "loss": 25.4124, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.012206033098495576, |
| "grad_norm": 19.625, |
| "learning_rate": 0.0004980888790988222, |
| "loss": 25.4381, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.012235659392423964, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004980839401638146, |
| "loss": 25.4493, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.01226528568635235, |
| "grad_norm": 27.5, |
| "learning_rate": 0.0004980790012288071, |
| "loss": 25.4176, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.012294911980280739, |
| "grad_norm": 22.125, |
| "learning_rate": 0.0004980740622937995, |
| "loss": 25.443, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.012324538274209125, |
| "grad_norm": 19.25, |
| "learning_rate": 0.0004980691233587919, |
| "loss": 25.4559, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.012354164568137514, |
| "grad_norm": 22.25, |
| "learning_rate": 0.0004980641844237843, |
| "loss": 25.4706, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.0123837908620659, |
| "grad_norm": 23.625, |
| "learning_rate": 0.0004980592454887768, |
| "loss": 25.3049, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.012413417155994288, |
| "grad_norm": 19.375, |
| "learning_rate": 0.0004980543065537692, |
| "loss": 25.3767, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.012443043449922675, |
| "grad_norm": 18.5, |
| "learning_rate": 0.0004980493676187616, |
| "loss": 25.2916, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.012472669743851063, |
| "grad_norm": 16.875, |
| "learning_rate": 0.0004980444286837541, |
| "loss": 25.3168, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.01250229603777945, |
| "grad_norm": 18.375, |
| "learning_rate": 0.0004980394897487465, |
| "loss": 25.3036, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.012531922331707837, |
| "grad_norm": 19.125, |
| "learning_rate": 0.000498034550813739, |
| "loss": 25.2267, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.012561548625636224, |
| "grad_norm": 21.5, |
| "learning_rate": 0.0004980296118787314, |
| "loss": 25.2108, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.012591174919564612, |
| "grad_norm": 19.875, |
| "learning_rate": 0.0004980246729437239, |
| "loss": 25.3049, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.012620801213492999, |
| "grad_norm": 18.75, |
| "learning_rate": 0.0004980197340087163, |
| "loss": 25.1796, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.012650427507421387, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004980147950737087, |
| "loss": 25.1925, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.012680053801349773, |
| "grad_norm": 19.625, |
| "learning_rate": 0.000498009856138701, |
| "loss": 25.1745, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.012709680095278161, |
| "grad_norm": 29.375, |
| "learning_rate": 0.0004980049172036936, |
| "loss": 25.1481, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.012739306389206548, |
| "grad_norm": 19.5, |
| "learning_rate": 0.000497999978268686, |
| "loss": 25.2033, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.012768932683134936, |
| "grad_norm": 23.875, |
| "learning_rate": 0.0004979950393336784, |
| "loss": 25.2193, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.012798558977063322, |
| "grad_norm": 17.125, |
| "learning_rate": 0.0004979901003986708, |
| "loss": 25.1275, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.01282818527099171, |
| "grad_norm": 18.375, |
| "learning_rate": 0.0004979851614636633, |
| "loss": 25.0801, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.012857811564920097, |
| "grad_norm": 21.625, |
| "learning_rate": 0.0004979802225286557, |
| "loss": 25.1755, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.012887437858848485, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004979752835936481, |
| "loss": 25.0867, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.012917064152776872, |
| "grad_norm": 21.875, |
| "learning_rate": 0.0004979703446586406, |
| "loss": 25.114, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.01294669044670526, |
| "grad_norm": 20.0, |
| "learning_rate": 0.000497965405723633, |
| "loss": 24.985, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.012976316740633646, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004979604667886254, |
| "loss": 25.0454, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.013005943034562035, |
| "grad_norm": 22.375, |
| "learning_rate": 0.0004979555278536178, |
| "loss": 25.0174, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.013035569328490423, |
| "grad_norm": 21.25, |
| "learning_rate": 0.0004979505889186103, |
| "loss": 25.0277, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.01306519562241881, |
| "grad_norm": 19.5, |
| "learning_rate": 0.0004979456499836027, |
| "loss": 25.0168, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.013094821916347197, |
| "grad_norm": 19.25, |
| "learning_rate": 0.0004979407110485952, |
| "loss": 24.9805, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.013124448210275584, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004979357721135876, |
| "loss": 25.0201, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.013154074504203972, |
| "grad_norm": 20.0, |
| "learning_rate": 0.0004979308331785801, |
| "loss": 24.975, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.013183700798132358, |
| "grad_norm": 20.5, |
| "learning_rate": 0.0004979258942435725, |
| "loss": 24.9923, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.013213327092060747, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004979209553085649, |
| "loss": 24.9658, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.013242953385989133, |
| "grad_norm": 19.25, |
| "learning_rate": 0.0004979160163735573, |
| "loss": 24.9277, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.013272579679917521, |
| "grad_norm": 24.0, |
| "learning_rate": 0.0004979110774385498, |
| "loss": 24.9739, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.013302205973845908, |
| "grad_norm": 28.125, |
| "learning_rate": 0.0004979061385035422, |
| "loss": 24.9075, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.013331832267774296, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004979011995685347, |
| "loss": 24.9149, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.013361458561702682, |
| "grad_norm": 24.25, |
| "learning_rate": 0.0004978962606335271, |
| "loss": 24.7799, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.01339108485563107, |
| "grad_norm": 21.25, |
| "learning_rate": 0.0004978913216985195, |
| "loss": 24.7674, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.013420711149559457, |
| "grad_norm": 18.0, |
| "learning_rate": 0.000497886382763512, |
| "loss": 24.8117, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.013450337443487845, |
| "grad_norm": 18.5, |
| "learning_rate": 0.0004978814438285044, |
| "loss": 24.8415, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.013479963737416232, |
| "grad_norm": 29.625, |
| "learning_rate": 0.0004978765048934969, |
| "loss": 24.7777, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.01350959003134462, |
| "grad_norm": 22.375, |
| "learning_rate": 0.0004978715659584893, |
| "loss": 24.7968, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.013539216325273006, |
| "grad_norm": 21.25, |
| "learning_rate": 0.0004978666270234817, |
| "loss": 24.8259, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.013568842619201394, |
| "grad_norm": 24.125, |
| "learning_rate": 0.000497861688088474, |
| "loss": 24.8418, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.01359846891312978, |
| "grad_norm": 20.0, |
| "learning_rate": 0.0004978567491534666, |
| "loss": 24.7194, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.013628095207058169, |
| "grad_norm": 15.625, |
| "learning_rate": 0.000497851810218459, |
| "loss": 24.7353, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.013657721500986555, |
| "grad_norm": 19.625, |
| "learning_rate": 0.0004978468712834515, |
| "loss": 24.6211, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.013687347794914944, |
| "grad_norm": 20.5, |
| "learning_rate": 0.0004978419323484438, |
| "loss": 24.713, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.01371697408884333, |
| "grad_norm": 20.0, |
| "learning_rate": 0.0004978369934134363, |
| "loss": 24.6394, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.013746600382771718, |
| "grad_norm": 17.375, |
| "learning_rate": 0.0004978320544784287, |
| "loss": 24.6203, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.013776226676700105, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004978271155434211, |
| "loss": 24.6364, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.013805852970628493, |
| "grad_norm": 19.125, |
| "learning_rate": 0.0004978221766084135, |
| "loss": 24.631, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.01383547926455688, |
| "grad_norm": 17.25, |
| "learning_rate": 0.000497817237673406, |
| "loss": 24.6057, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.013865105558485268, |
| "grad_norm": 26.5, |
| "learning_rate": 0.0004978122987383984, |
| "loss": 24.6704, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.013894731852413654, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004978073598033908, |
| "loss": 24.5586, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.013924358146342042, |
| "grad_norm": 19.25, |
| "learning_rate": 0.0004978024208683833, |
| "loss": 24.617, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.013953984440270429, |
| "grad_norm": 16.25, |
| "learning_rate": 0.0004977974819333757, |
| "loss": 24.6328, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.013983610734198817, |
| "grad_norm": 17.75, |
| "learning_rate": 0.0004977925429983682, |
| "loss": 24.5858, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.014013237028127203, |
| "grad_norm": 20.625, |
| "learning_rate": 0.0004977876040633606, |
| "loss": 24.5509, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.014042863322055591, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004977826651283531, |
| "loss": 24.5998, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.014072489615983978, |
| "grad_norm": 18.25, |
| "learning_rate": 0.0004977777261933455, |
| "loss": 24.4267, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.014102115909912366, |
| "grad_norm": 17.0, |
| "learning_rate": 0.0004977727872583379, |
| "loss": 24.548, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.014131742203840753, |
| "grad_norm": 17.625, |
| "learning_rate": 0.0004977678483233303, |
| "loss": 24.5241, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.01416136849776914, |
| "grad_norm": 22.5, |
| "learning_rate": 0.0004977629093883228, |
| "loss": 24.5497, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.014190994791697527, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004977579704533152, |
| "loss": 24.4922, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.014220621085625915, |
| "grad_norm": 20.125, |
| "learning_rate": 0.0004977530315183077, |
| "loss": 24.4758, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.014250247379554302, |
| "grad_norm": 21.5, |
| "learning_rate": 0.0004977480925833001, |
| "loss": 24.4183, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.01427987367348269, |
| "grad_norm": 15.8125, |
| "learning_rate": 0.0004977431536482926, |
| "loss": 24.4069, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.014309499967411076, |
| "grad_norm": 17.625, |
| "learning_rate": 0.000497738214713285, |
| "loss": 24.3877, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.014339126261339465, |
| "grad_norm": 17.5, |
| "learning_rate": 0.0004977332757782774, |
| "loss": 24.4781, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.014368752555267851, |
| "grad_norm": 16.25, |
| "learning_rate": 0.0004977283368432699, |
| "loss": 24.3834, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.01439837884919624, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004977233979082623, |
| "loss": 24.3847, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.014428005143124626, |
| "grad_norm": 17.75, |
| "learning_rate": 0.0004977184589732547, |
| "loss": 24.4303, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.014457631437053014, |
| "grad_norm": 18.125, |
| "learning_rate": 0.000497713520038247, |
| "loss": 24.3612, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.0144872577309814, |
| "grad_norm": 18.625, |
| "learning_rate": 0.0004977085811032396, |
| "loss": 24.3586, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.014516884024909789, |
| "grad_norm": 19.875, |
| "learning_rate": 0.000497703642168232, |
| "loss": 24.3231, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.014546510318838175, |
| "grad_norm": 18.0, |
| "learning_rate": 0.0004976987032332245, |
| "loss": 24.3263, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.014576136612766563, |
| "grad_norm": 23.125, |
| "learning_rate": 0.0004976937642982168, |
| "loss": 24.2935, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.01460576290669495, |
| "grad_norm": 18.25, |
| "learning_rate": 0.0004976888253632093, |
| "loss": 24.244, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.014635389200623338, |
| "grad_norm": 17.125, |
| "learning_rate": 0.0004976838864282017, |
| "loss": 24.2778, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.014665015494551724, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004976789474931941, |
| "loss": 24.3038, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.014694641788480112, |
| "grad_norm": 16.875, |
| "learning_rate": 0.0004976740085581865, |
| "loss": 24.1973, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.014724268082408499, |
| "grad_norm": 17.25, |
| "learning_rate": 0.000497669069623179, |
| "loss": 24.2236, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.014753894376336887, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004976641306881714, |
| "loss": 24.2274, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.014783520670265273, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004976591917531639, |
| "loss": 24.3033, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.014813146964193662, |
| "grad_norm": 19.5, |
| "learning_rate": 0.0004976542528181563, |
| "loss": 24.1171, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.014842773258122048, |
| "grad_norm": 19.625, |
| "learning_rate": 0.0004976493138831488, |
| "loss": 24.1736, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.014872399552050436, |
| "grad_norm": 18.625, |
| "learning_rate": 0.0004976443749481412, |
| "loss": 24.2905, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.014902025845978823, |
| "grad_norm": 18.375, |
| "learning_rate": 0.0004976394360131336, |
| "loss": 24.1324, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.014931652139907211, |
| "grad_norm": 20.375, |
| "learning_rate": 0.0004976344970781261, |
| "loss": 24.1785, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.014961278433835597, |
| "grad_norm": 17.625, |
| "learning_rate": 0.0004976295581431185, |
| "loss": 24.1872, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.014990904727763986, |
| "grad_norm": 18.0, |
| "learning_rate": 0.0004976246192081109, |
| "loss": 24.1813, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.015020531021692372, |
| "grad_norm": 21.75, |
| "learning_rate": 0.0004976196802731033, |
| "loss": 24.1438, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.01505015731562076, |
| "grad_norm": 22.375, |
| "learning_rate": 0.0004976147413380958, |
| "loss": 24.1436, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.015079783609549147, |
| "grad_norm": 19.5, |
| "learning_rate": 0.0004976098024030882, |
| "loss": 24.1394, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.015109409903477535, |
| "grad_norm": 18.25, |
| "learning_rate": 0.0004976048634680807, |
| "loss": 24.0992, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.015139036197405921, |
| "grad_norm": 15.6875, |
| "learning_rate": 0.0004975999245330731, |
| "loss": 24.0464, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.01516866249133431, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004975949855980656, |
| "loss": 24.0805, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.015198288785262696, |
| "grad_norm": 16.875, |
| "learning_rate": 0.000497590046663058, |
| "loss": 24.0985, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.015227915079191084, |
| "grad_norm": 23.25, |
| "learning_rate": 0.0004975851077280504, |
| "loss": 24.042, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.01525754137311947, |
| "grad_norm": 19.375, |
| "learning_rate": 0.0004975801687930429, |
| "loss": 24.0048, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.015287167667047859, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004975752298580353, |
| "loss": 24.0158, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.015316793960976245, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004975702909230277, |
| "loss": 23.9875, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.015346420254904633, |
| "grad_norm": 20.75, |
| "learning_rate": 0.0004975653519880202, |
| "loss": 24.055, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.01537604654883302, |
| "grad_norm": 16.125, |
| "learning_rate": 0.0004975604130530126, |
| "loss": 23.9887, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.015405672842761408, |
| "grad_norm": 17.25, |
| "learning_rate": 0.000497555474118005, |
| "loss": 24.0268, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.015435299136689794, |
| "grad_norm": 18.125, |
| "learning_rate": 0.0004975505351829975, |
| "loss": 24.0453, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.015464925430618183, |
| "grad_norm": 21.625, |
| "learning_rate": 0.0004975455962479898, |
| "loss": 24.0189, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.015494551724546569, |
| "grad_norm": 15.625, |
| "learning_rate": 0.0004975406573129823, |
| "loss": 23.9409, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.015524178018474957, |
| "grad_norm": 18.0, |
| "learning_rate": 0.0004975357183779747, |
| "loss": 23.8996, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.015553804312403344, |
| "grad_norm": 21.875, |
| "learning_rate": 0.0004975307794429671, |
| "loss": 24.0183, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.015583430606331732, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004975258405079595, |
| "loss": 23.9392, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.015613056900260118, |
| "grad_norm": 16.375, |
| "learning_rate": 0.000497520901572952, |
| "loss": 23.9131, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.015642683194188507, |
| "grad_norm": 17.5, |
| "learning_rate": 0.0004975159626379444, |
| "loss": 23.8963, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.015672309488116893, |
| "grad_norm": 16.625, |
| "learning_rate": 0.0004975110237029369, |
| "loss": 23.9361, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.01570193578204528, |
| "grad_norm": 18.25, |
| "learning_rate": 0.0004975060847679293, |
| "loss": 23.9129, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.01573156207597367, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004975011458329218, |
| "loss": 23.8795, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.015761188369902056, |
| "grad_norm": 14.8125, |
| "learning_rate": 0.0004974962068979142, |
| "loss": 23.8412, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.015790814663830442, |
| "grad_norm": 19.625, |
| "learning_rate": 0.0004974912679629066, |
| "loss": 23.8545, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.01582044095775883, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004974863290278991, |
| "loss": 23.8848, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.01585006725168722, |
| "grad_norm": 18.125, |
| "learning_rate": 0.0004974813900928915, |
| "loss": 23.7463, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.015879693545615605, |
| "grad_norm": 17.25, |
| "learning_rate": 0.0004974764511578839, |
| "loss": 23.8657, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.01590931983954399, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004974715122228763, |
| "loss": 23.7865, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.015938946133472378, |
| "grad_norm": 18.875, |
| "learning_rate": 0.0004974665732878688, |
| "loss": 23.7971, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.015968572427400768, |
| "grad_norm": 19.125, |
| "learning_rate": 0.0004974616343528612, |
| "loss": 23.8342, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.015998198721329154, |
| "grad_norm": 16.75, |
| "learning_rate": 0.0004974566954178537, |
| "loss": 23.7571, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.01602782501525754, |
| "grad_norm": 16.75, |
| "learning_rate": 0.0004974517564828461, |
| "loss": 23.8034, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.016057451309185927, |
| "grad_norm": 17.25, |
| "learning_rate": 0.0004974468175478386, |
| "loss": 23.7763, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.016087077603114317, |
| "grad_norm": 18.25, |
| "learning_rate": 0.000497441878612831, |
| "loss": 23.803, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.016116703897042704, |
| "grad_norm": 20.875, |
| "learning_rate": 0.0004974369396778234, |
| "loss": 23.7222, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.01614633019097109, |
| "grad_norm": 20.5, |
| "learning_rate": 0.0004974320007428159, |
| "loss": 23.6994, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.016175956484899476, |
| "grad_norm": 15.3125, |
| "learning_rate": 0.0004974270618078083, |
| "loss": 23.6471, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.016205582778827866, |
| "grad_norm": 15.5, |
| "learning_rate": 0.0004974221228728007, |
| "loss": 23.7271, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.016235209072756253, |
| "grad_norm": 17.5, |
| "learning_rate": 0.0004974171839377932, |
| "loss": 23.6869, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.01626483536668464, |
| "grad_norm": 16.75, |
| "learning_rate": 0.0004974122450027856, |
| "loss": 23.6976, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.016294461660613026, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004974073060677781, |
| "loss": 23.6657, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.016324087954541416, |
| "grad_norm": 18.875, |
| "learning_rate": 0.0004974023671327705, |
| "loss": 23.6059, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.016353714248469802, |
| "grad_norm": 16.875, |
| "learning_rate": 0.0004973974281977628, |
| "loss": 23.6203, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.01638334054239819, |
| "grad_norm": 26.0, |
| "learning_rate": 0.0004973924892627553, |
| "loss": 23.5207, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.016412966836326575, |
| "grad_norm": 18.25, |
| "learning_rate": 0.0004973875503277477, |
| "loss": 23.711, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.016442593130254965, |
| "grad_norm": 17.125, |
| "learning_rate": 0.0004973826113927401, |
| "loss": 23.5764, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.01647221942418335, |
| "grad_norm": 18.125, |
| "learning_rate": 0.0004973776724577325, |
| "loss": 23.6693, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.016501845718111738, |
| "grad_norm": 20.875, |
| "learning_rate": 0.000497372733522725, |
| "loss": 23.5375, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.016531472012040124, |
| "grad_norm": 14.75, |
| "learning_rate": 0.0004973677945877174, |
| "loss": 23.5473, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.016561098305968514, |
| "grad_norm": 15.625, |
| "learning_rate": 0.0004973628556527099, |
| "loss": 23.5889, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.0165907245998969, |
| "grad_norm": 16.875, |
| "learning_rate": 0.0004973579167177023, |
| "loss": 23.5879, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.016620350893825287, |
| "grad_norm": 18.375, |
| "learning_rate": 0.0004973529777826948, |
| "loss": 23.4974, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.016649977187753674, |
| "grad_norm": 15.625, |
| "learning_rate": 0.0004973480388476872, |
| "loss": 23.4771, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.016679603481682063, |
| "grad_norm": 17.5, |
| "learning_rate": 0.0004973430999126796, |
| "loss": 23.4806, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.01670922977561045, |
| "grad_norm": 19.75, |
| "learning_rate": 0.0004973381609776721, |
| "loss": 23.651, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.016738856069538836, |
| "grad_norm": 16.625, |
| "learning_rate": 0.0004973332220426645, |
| "loss": 23.5367, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.016768482363467223, |
| "grad_norm": 19.875, |
| "learning_rate": 0.0004973282831076569, |
| "loss": 23.5171, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.016798108657395613, |
| "grad_norm": 17.125, |
| "learning_rate": 0.0004973233441726494, |
| "loss": 23.4766, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.016827734951324, |
| "grad_norm": 15.3125, |
| "learning_rate": 0.0004973184052376418, |
| "loss": 23.4622, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.016857361245252386, |
| "grad_norm": 19.375, |
| "learning_rate": 0.0004973134663026343, |
| "loss": 23.5135, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.016886987539180772, |
| "grad_norm": 17.625, |
| "learning_rate": 0.0004973085273676267, |
| "loss": 23.485, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.016916613833109162, |
| "grad_norm": 17.5, |
| "learning_rate": 0.0004973035884326191, |
| "loss": 23.4218, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.01694624012703755, |
| "grad_norm": 16.375, |
| "learning_rate": 0.0004972986494976116, |
| "loss": 23.4405, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.016975866420965935, |
| "grad_norm": 21.125, |
| "learning_rate": 0.000497293710562604, |
| "loss": 23.4308, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.01700549271489432, |
| "grad_norm": 18.875, |
| "learning_rate": 0.0004972887716275964, |
| "loss": 23.4076, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.01703511900882271, |
| "grad_norm": 16.25, |
| "learning_rate": 0.0004972838326925889, |
| "loss": 23.4027, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.017064745302751098, |
| "grad_norm": 15.9375, |
| "learning_rate": 0.0004972788937575813, |
| "loss": 23.3797, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.017094371596679484, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004972739548225737, |
| "loss": 23.403, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.01712399789060787, |
| "grad_norm": 20.25, |
| "learning_rate": 0.0004972690158875662, |
| "loss": 23.435, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.01715362418453626, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004972640769525586, |
| "loss": 23.3429, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.017183250478464647, |
| "grad_norm": 19.0, |
| "learning_rate": 0.0004972591380175511, |
| "loss": 23.3458, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.017212876772393033, |
| "grad_norm": 14.3125, |
| "learning_rate": 0.0004972541990825435, |
| "loss": 23.3765, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.01724250306632142, |
| "grad_norm": 17.375, |
| "learning_rate": 0.0004972492601475358, |
| "loss": 23.3812, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.01727212936024981, |
| "grad_norm": 17.375, |
| "learning_rate": 0.0004972443212125283, |
| "loss": 23.3419, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.017301755654178196, |
| "grad_norm": 15.9375, |
| "learning_rate": 0.0004972393822775207, |
| "loss": 23.1804, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.017331381948106583, |
| "grad_norm": 15.125, |
| "learning_rate": 0.0004972344433425131, |
| "loss": 23.2947, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.01736100824203497, |
| "grad_norm": 17.0, |
| "learning_rate": 0.0004972295044075056, |
| "loss": 23.402, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.01739063453596336, |
| "grad_norm": 18.5, |
| "learning_rate": 0.000497224565472498, |
| "loss": 23.2933, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.017420260829891746, |
| "grad_norm": 17.125, |
| "learning_rate": 0.0004972196265374904, |
| "loss": 23.296, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.017449887123820132, |
| "grad_norm": 15.5625, |
| "learning_rate": 0.0004972146876024829, |
| "loss": 23.2247, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.017479513417748522, |
| "grad_norm": 15.375, |
| "learning_rate": 0.0004972097486674753, |
| "loss": 23.1945, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.01750913971167691, |
| "grad_norm": 17.25, |
| "learning_rate": 0.0004972048097324678, |
| "loss": 23.2879, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.017538766005605295, |
| "grad_norm": 16.5, |
| "learning_rate": 0.0004971998707974602, |
| "loss": 23.2503, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.01756839229953368, |
| "grad_norm": 16.875, |
| "learning_rate": 0.0004971949318624526, |
| "loss": 23.2298, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.01759801859346207, |
| "grad_norm": 16.5, |
| "learning_rate": 0.0004971899929274451, |
| "loss": 23.2478, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.017627644887390458, |
| "grad_norm": 16.875, |
| "learning_rate": 0.0004971850539924375, |
| "loss": 23.2439, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.017657271181318844, |
| "grad_norm": 16.75, |
| "learning_rate": 0.0004971801150574299, |
| "loss": 23.2426, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.01768689747524723, |
| "grad_norm": 15.5, |
| "learning_rate": 0.0004971751761224224, |
| "loss": 23.2587, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.01771652376917562, |
| "grad_norm": 15.6875, |
| "learning_rate": 0.0004971702371874148, |
| "loss": 23.2458, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.017746150063104007, |
| "grad_norm": 17.875, |
| "learning_rate": 0.0004971652982524073, |
| "loss": 23.1944, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.017775776357032393, |
| "grad_norm": 15.5, |
| "learning_rate": 0.0004971603593173997, |
| "loss": 23.2169, |
| "step": 12000 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 2025228, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.255909218322743e+18, |
| "train_batch_size": 48, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|