| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3859, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0002591344908007256, | |
| "grad_norm": 111.01161666281861, | |
| "learning_rate": 5.181347150259068e-07, | |
| "loss": 12.3076, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001295672454003628, | |
| "grad_norm": 128.99309976149883, | |
| "learning_rate": 2.5906735751295338e-06, | |
| "loss": 11.8519, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.002591344908007256, | |
| "grad_norm": 117.2928726132405, | |
| "learning_rate": 5.1813471502590676e-06, | |
| "loss": 12.3366, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0038870173620108835, | |
| "grad_norm": 89.9905121675766, | |
| "learning_rate": 7.772020725388602e-06, | |
| "loss": 11.7319, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.005182689816014512, | |
| "grad_norm": 52.319681120581, | |
| "learning_rate": 1.0362694300518135e-05, | |
| "loss": 8.7838, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0064783622700181395, | |
| "grad_norm": 38.24088557602701, | |
| "learning_rate": 1.2953367875647668e-05, | |
| "loss": 7.3055, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.007774034724021767, | |
| "grad_norm": 25.83067627606677, | |
| "learning_rate": 1.5544041450777204e-05, | |
| "loss": 5.7221, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009069707178025395, | |
| "grad_norm": 7.762285591416621, | |
| "learning_rate": 1.813471502590674e-05, | |
| "loss": 4.3566, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.010365379632029024, | |
| "grad_norm": 4.409457180338291, | |
| "learning_rate": 2.072538860103627e-05, | |
| "loss": 3.6268, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.011661052086032651, | |
| "grad_norm": 2.601317803014797, | |
| "learning_rate": 2.3316062176165805e-05, | |
| "loss": 3.3055, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.012956724540036279, | |
| "grad_norm": 1.8400291034971308, | |
| "learning_rate": 2.5906735751295337e-05, | |
| "loss": 3.0095, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014252396994039906, | |
| "grad_norm": 1.4104791862031891, | |
| "learning_rate": 2.8497409326424872e-05, | |
| "loss": 2.7918, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.015548069448043534, | |
| "grad_norm": 1.4652220793947772, | |
| "learning_rate": 3.108808290155441e-05, | |
| "loss": 2.7691, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01684374190204716, | |
| "grad_norm": 2.1149598221315955, | |
| "learning_rate": 3.367875647668394e-05, | |
| "loss": 2.6915, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.01813941435605079, | |
| "grad_norm": 2.508562821895131, | |
| "learning_rate": 3.626943005181348e-05, | |
| "loss": 2.5864, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.019435086810054417, | |
| "grad_norm": 5.312266133287117, | |
| "learning_rate": 3.886010362694301e-05, | |
| "loss": 2.3482, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.020730759264058048, | |
| "grad_norm": 6.635762805683308, | |
| "learning_rate": 4.145077720207254e-05, | |
| "loss": 2.0166, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.022026431718061675, | |
| "grad_norm": 1.6009933261373699, | |
| "learning_rate": 4.404145077720208e-05, | |
| "loss": 1.5391, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.023322104172065303, | |
| "grad_norm": 1.390865987104561, | |
| "learning_rate": 4.663212435233161e-05, | |
| "loss": 1.3383, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02461777662606893, | |
| "grad_norm": 1.0505820893868818, | |
| "learning_rate": 4.922279792746114e-05, | |
| "loss": 1.2646, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.025913449080072558, | |
| "grad_norm": 1.0086607725155037, | |
| "learning_rate": 5.1813471502590674e-05, | |
| "loss": 1.3202, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.027209121534076185, | |
| "grad_norm": 5.464697038037798, | |
| "learning_rate": 5.440414507772021e-05, | |
| "loss": 1.2451, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.028504793988079813, | |
| "grad_norm": 0.830545367098088, | |
| "learning_rate": 5.6994818652849744e-05, | |
| "loss": 1.2681, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02980046644208344, | |
| "grad_norm": 0.7132598923355695, | |
| "learning_rate": 5.9585492227979276e-05, | |
| "loss": 1.2245, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.031096138896087068, | |
| "grad_norm": 0.8138882694476683, | |
| "learning_rate": 6.217616580310881e-05, | |
| "loss": 1.2305, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0323918113500907, | |
| "grad_norm": 0.8807133122197233, | |
| "learning_rate": 6.476683937823834e-05, | |
| "loss": 1.2634, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03368748380409432, | |
| "grad_norm": 0.62485091325362, | |
| "learning_rate": 6.735751295336788e-05, | |
| "loss": 1.3386, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.034983156258097954, | |
| "grad_norm": 0.7549667435061143, | |
| "learning_rate": 6.994818652849742e-05, | |
| "loss": 1.2513, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.03627882871210158, | |
| "grad_norm": 0.6420487687876172, | |
| "learning_rate": 7.253886010362695e-05, | |
| "loss": 1.2101, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03757450116610521, | |
| "grad_norm": 0.7398721962367694, | |
| "learning_rate": 7.512953367875648e-05, | |
| "loss": 1.2776, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.03887017362010883, | |
| "grad_norm": 0.6450506158667421, | |
| "learning_rate": 7.772020725388602e-05, | |
| "loss": 1.2096, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.040165846074112464, | |
| "grad_norm": 0.6506841169994266, | |
| "learning_rate": 8.031088082901554e-05, | |
| "loss": 1.228, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.041461518528116095, | |
| "grad_norm": 0.711359178146542, | |
| "learning_rate": 8.290155440414508e-05, | |
| "loss": 1.2474, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04275719098211972, | |
| "grad_norm": 1.459357949005984, | |
| "learning_rate": 8.549222797927462e-05, | |
| "loss": 1.2251, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04405286343612335, | |
| "grad_norm": 0.8207401278130507, | |
| "learning_rate": 8.808290155440416e-05, | |
| "loss": 1.2625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.045348535890126974, | |
| "grad_norm": 0.6271337633480082, | |
| "learning_rate": 9.067357512953368e-05, | |
| "loss": 1.2222, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.046644208344130605, | |
| "grad_norm": 0.6465301721957318, | |
| "learning_rate": 9.326424870466322e-05, | |
| "loss": 1.2357, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04793988079813423, | |
| "grad_norm": 0.5864341660869246, | |
| "learning_rate": 9.585492227979275e-05, | |
| "loss": 1.2422, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.04923555325213786, | |
| "grad_norm": 0.5644516524286168, | |
| "learning_rate": 9.844559585492228e-05, | |
| "loss": 1.1901, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.050531225706141485, | |
| "grad_norm": 0.654016063568062, | |
| "learning_rate": 0.00010103626943005182, | |
| "loss": 1.182, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.051826898160145116, | |
| "grad_norm": 0.569272168627719, | |
| "learning_rate": 0.00010362694300518135, | |
| "loss": 1.1958, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05312257061414875, | |
| "grad_norm": 0.6368680847682564, | |
| "learning_rate": 0.00010621761658031089, | |
| "loss": 1.2694, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.05441824306815237, | |
| "grad_norm": 0.5948244637123268, | |
| "learning_rate": 0.00010880829015544042, | |
| "loss": 1.1967, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.055713915522156, | |
| "grad_norm": 0.5541951524654064, | |
| "learning_rate": 0.00011139896373056995, | |
| "loss": 1.1941, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.057009587976159626, | |
| "grad_norm": 0.5701648418604164, | |
| "learning_rate": 0.00011398963730569949, | |
| "loss": 1.2443, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05830526043016326, | |
| "grad_norm": 0.4865998338825169, | |
| "learning_rate": 0.00011658031088082901, | |
| "loss": 1.2106, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.05960093288416688, | |
| "grad_norm": 0.5131095922403084, | |
| "learning_rate": 0.00011917098445595855, | |
| "loss": 1.1278, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06089660533817051, | |
| "grad_norm": 0.6563988864261227, | |
| "learning_rate": 0.0001217616580310881, | |
| "loss": 1.1991, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.062192277792174136, | |
| "grad_norm": 0.5769507376758363, | |
| "learning_rate": 0.00012435233160621763, | |
| "loss": 1.1674, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06348795024617776, | |
| "grad_norm": 0.5283246008008212, | |
| "learning_rate": 0.00012694300518134715, | |
| "loss": 1.2367, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.0647836227001814, | |
| "grad_norm": 0.5536285028186603, | |
| "learning_rate": 0.00012953367875647668, | |
| "loss": 1.1649, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06607929515418502, | |
| "grad_norm": 0.5163473385172975, | |
| "learning_rate": 0.00013212435233160623, | |
| "loss": 1.2867, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.06737496760818865, | |
| "grad_norm": 0.5475813868100357, | |
| "learning_rate": 0.00013471502590673575, | |
| "loss": 1.2152, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06867064006219228, | |
| "grad_norm": 0.511381640001362, | |
| "learning_rate": 0.00013730569948186528, | |
| "loss": 1.2065, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.06996631251619591, | |
| "grad_norm": 0.4982475298525502, | |
| "learning_rate": 0.00013989637305699483, | |
| "loss": 1.2142, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07126198497019953, | |
| "grad_norm": 0.5433832176160214, | |
| "learning_rate": 0.00014248704663212436, | |
| "loss": 1.2276, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.07255765742420316, | |
| "grad_norm": 0.47834421596861043, | |
| "learning_rate": 0.0001450777202072539, | |
| "loss": 1.1183, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0738533298782068, | |
| "grad_norm": 2.276620321866631, | |
| "learning_rate": 0.0001476683937823834, | |
| "loss": 1.2225, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.07514900233221042, | |
| "grad_norm": 0.5622928799874418, | |
| "learning_rate": 0.00015025906735751296, | |
| "loss": 1.142, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07644467478621404, | |
| "grad_norm": 0.48315577966120404, | |
| "learning_rate": 0.0001528497409326425, | |
| "loss": 1.1481, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.07774034724021767, | |
| "grad_norm": 0.5054965652645259, | |
| "learning_rate": 0.00015544041450777204, | |
| "loss": 1.1334, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0790360196942213, | |
| "grad_norm": 0.5412094676646951, | |
| "learning_rate": 0.00015803108808290156, | |
| "loss": 1.2215, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.08033169214822493, | |
| "grad_norm": 0.489058036506764, | |
| "learning_rate": 0.00016062176165803108, | |
| "loss": 1.1651, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08162736460222855, | |
| "grad_norm": 0.5225993706207775, | |
| "learning_rate": 0.00016321243523316064, | |
| "loss": 1.173, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.08292303705623219, | |
| "grad_norm": 0.4941052544991184, | |
| "learning_rate": 0.00016580310880829016, | |
| "loss": 1.1537, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08421870951023581, | |
| "grad_norm": 0.5317455067919804, | |
| "learning_rate": 0.0001683937823834197, | |
| "loss": 1.1702, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.08551438196423944, | |
| "grad_norm": 0.5382129937077951, | |
| "learning_rate": 0.00017098445595854924, | |
| "loss": 1.1954, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08681005441824306, | |
| "grad_norm": 0.4873382544458068, | |
| "learning_rate": 0.00017357512953367876, | |
| "loss": 1.1292, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.0881057268722467, | |
| "grad_norm": 0.869634547296972, | |
| "learning_rate": 0.00017616580310880832, | |
| "loss": 1.2342, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08940139932625032, | |
| "grad_norm": 0.47105707267021146, | |
| "learning_rate": 0.0001787564766839378, | |
| "loss": 1.1903, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.09069707178025395, | |
| "grad_norm": 0.483761714583125, | |
| "learning_rate": 0.00018134715025906737, | |
| "loss": 1.1753, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09199274423425757, | |
| "grad_norm": 0.5094212322859152, | |
| "learning_rate": 0.00018393782383419692, | |
| "loss": 1.1736, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.09328841668826121, | |
| "grad_norm": 0.49665577393117427, | |
| "learning_rate": 0.00018652849740932644, | |
| "loss": 1.19, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09458408914226483, | |
| "grad_norm": 0.4634306042294334, | |
| "learning_rate": 0.00018911917098445597, | |
| "loss": 1.2068, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.09587976159626846, | |
| "grad_norm": 0.4771955076541271, | |
| "learning_rate": 0.0001917098445595855, | |
| "loss": 1.1746, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.0971754340502721, | |
| "grad_norm": 0.49488098544084536, | |
| "learning_rate": 0.00019430051813471504, | |
| "loss": 1.1781, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.09847110650427572, | |
| "grad_norm": 0.45559447847276796, | |
| "learning_rate": 0.00019689119170984457, | |
| "loss": 1.207, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09976677895827935, | |
| "grad_norm": 0.530644388105199, | |
| "learning_rate": 0.0001994818652849741, | |
| "loss": 1.2059, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.10106245141228297, | |
| "grad_norm": 0.4262101606219483, | |
| "learning_rate": 0.0001999993453944367, | |
| "loss": 1.1728, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.10235812386628661, | |
| "grad_norm": 0.5019930710713301, | |
| "learning_rate": 0.00019999668607402385, | |
| "loss": 1.1566, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.10365379632029023, | |
| "grad_norm": 0.41212142912399047, | |
| "learning_rate": 0.00019999198118027207, | |
| "loss": 1.1367, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10494946877429386, | |
| "grad_norm": 0.4880776387603866, | |
| "learning_rate": 0.00019998523080942663, | |
| "loss": 1.1654, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.1062451412282975, | |
| "grad_norm": 0.46286032679323585, | |
| "learning_rate": 0.00019997643509957582, | |
| "loss": 1.1944, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.10754081368230112, | |
| "grad_norm": 0.5382364265983718, | |
| "learning_rate": 0.00019996559423064838, | |
| "loss": 1.1871, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.10883648613630474, | |
| "grad_norm": 0.5437136001752437, | |
| "learning_rate": 0.0001999527084244095, | |
| "loss": 1.1211, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11013215859030837, | |
| "grad_norm": 0.4683209703143848, | |
| "learning_rate": 0.00019993777794445662, | |
| "loss": 1.2111, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.111427831044312, | |
| "grad_norm": 0.43839848429469386, | |
| "learning_rate": 0.00019992080309621371, | |
| "loss": 1.1655, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11272350349831563, | |
| "grad_norm": 0.4352884542992481, | |
| "learning_rate": 0.00019990178422692528, | |
| "loss": 1.1674, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.11401917595231925, | |
| "grad_norm": 0.45552967972039005, | |
| "learning_rate": 0.00019988072172564918, | |
| "loss": 1.2005, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11531484840632288, | |
| "grad_norm": 0.46569070576531457, | |
| "learning_rate": 0.0001998576160232485, | |
| "loss": 1.1611, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.11661052086032651, | |
| "grad_norm": 0.4344558621258368, | |
| "learning_rate": 0.00019983246759238305, | |
| "loss": 1.1632, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.11790619331433014, | |
| "grad_norm": 0.4524631212974695, | |
| "learning_rate": 0.00019980527694749952, | |
| "loss": 1.2003, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.11920186576833376, | |
| "grad_norm": 0.692041740038434, | |
| "learning_rate": 0.00019977604464482083, | |
| "loss": 1.233, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1204975382223374, | |
| "grad_norm": 0.4499116414766127, | |
| "learning_rate": 0.00019974477128233505, | |
| "loss": 1.1431, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.12179321067634102, | |
| "grad_norm": 0.4755303917497374, | |
| "learning_rate": 0.00019971145749978294, | |
| "loss": 1.186, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12308888313034465, | |
| "grad_norm": 0.4587427439073901, | |
| "learning_rate": 0.00019967610397864493, | |
| "loss": 1.1772, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.12438455558434827, | |
| "grad_norm": 0.43004396404922146, | |
| "learning_rate": 0.0001996387114421272, | |
| "loss": 1.1613, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1256802280383519, | |
| "grad_norm": 0.4457323824719563, | |
| "learning_rate": 0.0001995992806551468, | |
| "loss": 1.2043, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.12697590049235552, | |
| "grad_norm": 0.46784030959656325, | |
| "learning_rate": 0.00019955781242431622, | |
| "loss": 1.1678, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12827157294635916, | |
| "grad_norm": 0.4610982688278206, | |
| "learning_rate": 0.00019951430759792654, | |
| "loss": 1.2353, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.1295672454003628, | |
| "grad_norm": 0.4259792086854904, | |
| "learning_rate": 0.0001994687670659305, | |
| "loss": 1.1612, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1308629178543664, | |
| "grad_norm": 0.45520840895219594, | |
| "learning_rate": 0.00019942119175992383, | |
| "loss": 1.099, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.13215859030837004, | |
| "grad_norm": 0.4473123797320602, | |
| "learning_rate": 0.00019937158265312667, | |
| "loss": 1.2141, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.13345426276237368, | |
| "grad_norm": 0.4713319110562635, | |
| "learning_rate": 0.00019931994076036324, | |
| "loss": 1.1454, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.1347499352163773, | |
| "grad_norm": 0.5136825790567691, | |
| "learning_rate": 0.00019926626713804137, | |
| "loss": 1.2187, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.13604560767038093, | |
| "grad_norm": 0.4727966110942116, | |
| "learning_rate": 0.00019921056288413076, | |
| "loss": 1.1988, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.13734128012438457, | |
| "grad_norm": 0.4364604236158943, | |
| "learning_rate": 0.00019915282913814052, | |
| "loss": 1.1685, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.13863695257838818, | |
| "grad_norm": 0.48078362848188844, | |
| "learning_rate": 0.00019909306708109585, | |
| "loss": 1.1773, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.13993262503239182, | |
| "grad_norm": 0.45186495929915804, | |
| "learning_rate": 0.00019903127793551408, | |
| "loss": 1.1409, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14122829748639543, | |
| "grad_norm": 0.4557721517395367, | |
| "learning_rate": 0.0001989674629653793, | |
| "loss": 1.1243, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.14252396994039906, | |
| "grad_norm": 0.47938538143807347, | |
| "learning_rate": 0.00019890162347611687, | |
| "loss": 1.1644, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1438196423944027, | |
| "grad_norm": 0.4541147395460418, | |
| "learning_rate": 0.00019883376081456636, | |
| "loss": 1.2578, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.1451153148484063, | |
| "grad_norm": 0.4780871852039313, | |
| "learning_rate": 0.00019876387636895437, | |
| "loss": 1.206, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.14641098730240995, | |
| "grad_norm": 0.42628778420968705, | |
| "learning_rate": 0.00019869197156886586, | |
| "loss": 1.1728, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.1477066597564136, | |
| "grad_norm": 0.4303201965277035, | |
| "learning_rate": 0.00019861804788521493, | |
| "loss": 1.1807, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1490023322104172, | |
| "grad_norm": 0.48120812526739415, | |
| "learning_rate": 0.00019854210683021485, | |
| "loss": 1.1825, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.15029800466442084, | |
| "grad_norm": 0.479571534871543, | |
| "learning_rate": 0.0001984641499573472, | |
| "loss": 1.1213, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15159367711842447, | |
| "grad_norm": 0.4517330978325847, | |
| "learning_rate": 0.0001983841788613297, | |
| "loss": 1.1799, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.15288934957242808, | |
| "grad_norm": 0.45662716170150547, | |
| "learning_rate": 0.00019830219517808404, | |
| "loss": 1.2154, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.15418502202643172, | |
| "grad_norm": 0.46419741725194397, | |
| "learning_rate": 0.00019821820058470215, | |
| "loss": 1.2244, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.15548069448043533, | |
| "grad_norm": 0.48643943236229664, | |
| "learning_rate": 0.00019813219679941203, | |
| "loss": 1.1773, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.15677636693443897, | |
| "grad_norm": 0.4213189862656036, | |
| "learning_rate": 0.00019804418558154243, | |
| "loss": 1.1918, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.1580720393884426, | |
| "grad_norm": 0.4039792149279076, | |
| "learning_rate": 0.00019795416873148703, | |
| "loss": 1.1937, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.15936771184244622, | |
| "grad_norm": 0.42408763347068035, | |
| "learning_rate": 0.00019786214809066753, | |
| "loss": 1.1792, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.16066338429644986, | |
| "grad_norm": 0.4418941392259797, | |
| "learning_rate": 0.00019776812554149603, | |
| "loss": 1.0983, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.1619590567504535, | |
| "grad_norm": 0.44778137756258257, | |
| "learning_rate": 0.00019767210300733647, | |
| "loss": 1.1441, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.1632547292044571, | |
| "grad_norm": 0.4214693702892699, | |
| "learning_rate": 0.0001975740824524653, | |
| "loss": 1.126, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.16455040165846074, | |
| "grad_norm": 0.431868681760054, | |
| "learning_rate": 0.00019747406588203128, | |
| "loss": 1.2244, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.16584607411246438, | |
| "grad_norm": 0.41521488211670027, | |
| "learning_rate": 0.00019737205534201455, | |
| "loss": 1.1443, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.167141746566468, | |
| "grad_norm": 0.4141916874944073, | |
| "learning_rate": 0.00019726805291918464, | |
| "loss": 1.2012, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.16843741902047163, | |
| "grad_norm": 0.8019720904500388, | |
| "learning_rate": 0.0001971620607410579, | |
| "loss": 1.1871, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.16973309147447524, | |
| "grad_norm": 0.44302123796014203, | |
| "learning_rate": 0.00019705408097585393, | |
| "loss": 1.217, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.17102876392847888, | |
| "grad_norm": 0.44266067140812565, | |
| "learning_rate": 0.0001969441158324512, | |
| "loss": 1.159, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.17232443638248252, | |
| "grad_norm": 0.44647716533971304, | |
| "learning_rate": 0.0001968321675603419, | |
| "loss": 1.1653, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.17362010883648613, | |
| "grad_norm": 0.44623301312377617, | |
| "learning_rate": 0.000196718238449586, | |
| "loss": 1.2078, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.17491578129048976, | |
| "grad_norm": 0.41505140877535734, | |
| "learning_rate": 0.00019660233083076416, | |
| "loss": 1.1727, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.1762114537444934, | |
| "grad_norm": 0.4603289406918021, | |
| "learning_rate": 0.00019648444707493035, | |
| "loss": 1.2151, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.177507126198497, | |
| "grad_norm": 0.4185225256437822, | |
| "learning_rate": 0.00019636458959356316, | |
| "loss": 1.1708, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.17880279865250065, | |
| "grad_norm": 0.4207300329188764, | |
| "learning_rate": 0.00019624276083851655, | |
| "loss": 1.1821, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.1800984711065043, | |
| "grad_norm": 0.44173498052158916, | |
| "learning_rate": 0.00019611896330196956, | |
| "loss": 1.175, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.1813941435605079, | |
| "grad_norm": 0.43648924778909787, | |
| "learning_rate": 0.0001959931995163756, | |
| "loss": 1.1409, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.18268981601451154, | |
| "grad_norm": 0.41607673192874983, | |
| "learning_rate": 0.00019586547205441038, | |
| "loss": 1.1903, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.18398548846851515, | |
| "grad_norm": 0.40413543802190666, | |
| "learning_rate": 0.00019573578352891937, | |
| "loss": 1.1395, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.18528116092251878, | |
| "grad_norm": 0.45031653314971887, | |
| "learning_rate": 0.00019560413659286437, | |
| "loss": 1.1551, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.18657683337652242, | |
| "grad_norm": 0.4308169448866955, | |
| "learning_rate": 0.00019547053393926934, | |
| "loss": 1.1875, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.18787250583052603, | |
| "grad_norm": 0.42706739288180534, | |
| "learning_rate": 0.000195334978301165, | |
| "loss": 1.1828, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.18916817828452967, | |
| "grad_norm": 0.43201723960587324, | |
| "learning_rate": 0.00019519747245153333, | |
| "loss": 1.1269, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1904638507385333, | |
| "grad_norm": 0.45113312626512714, | |
| "learning_rate": 0.0001950580192032505, | |
| "loss": 1.228, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.19175952319253692, | |
| "grad_norm": 0.44161985380394386, | |
| "learning_rate": 0.0001949166214090295, | |
| "loss": 1.1242, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.19305519564654056, | |
| "grad_norm": 0.46536800518090093, | |
| "learning_rate": 0.00019477328196136178, | |
| "loss": 1.205, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.1943508681005442, | |
| "grad_norm": 0.4304870726759157, | |
| "learning_rate": 0.00019462800379245807, | |
| "loss": 1.1677, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.1956465405545478, | |
| "grad_norm": 0.4500472206954455, | |
| "learning_rate": 0.0001944807898741883, | |
| "loss": 1.2031, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.19694221300855144, | |
| "grad_norm": 0.4270521213996621, | |
| "learning_rate": 0.00019433164321802095, | |
| "loss": 1.221, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.19823788546255505, | |
| "grad_norm": 0.4500335364742716, | |
| "learning_rate": 0.00019418056687496135, | |
| "loss": 1.2206, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.1995335579165587, | |
| "grad_norm": 0.46760662103580936, | |
| "learning_rate": 0.00019402756393548936, | |
| "loss": 1.2628, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.20082923037056233, | |
| "grad_norm": 0.4374227196524263, | |
| "learning_rate": 0.00019387263752949598, | |
| "loss": 1.1895, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.20212490282456594, | |
| "grad_norm": 0.44423980739831237, | |
| "learning_rate": 0.00019371579082621952, | |
| "loss": 1.1915, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.20342057527856958, | |
| "grad_norm": 0.5067585361087695, | |
| "learning_rate": 0.00019355702703418063, | |
| "loss": 1.1396, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.20471624773257321, | |
| "grad_norm": 0.4397713592175906, | |
| "learning_rate": 0.0001933963494011168, | |
| "loss": 1.1829, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.20601192018657682, | |
| "grad_norm": 0.41142670533283804, | |
| "learning_rate": 0.0001932337612139157, | |
| "loss": 1.1403, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.20730759264058046, | |
| "grad_norm": 0.431969506585272, | |
| "learning_rate": 0.00019306926579854821, | |
| "loss": 1.1219, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2086032650945841, | |
| "grad_norm": 0.44265249198309475, | |
| "learning_rate": 0.00019290286652000018, | |
| "loss": 1.1897, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2098989375485877, | |
| "grad_norm": 0.4277657316516301, | |
| "learning_rate": 0.0001927345667822037, | |
| "loss": 1.1746, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.21119461000259135, | |
| "grad_norm": 0.40123037504170683, | |
| "learning_rate": 0.00019256437002796744, | |
| "loss": 1.1775, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.212490282456595, | |
| "grad_norm": 0.4250319228234625, | |
| "learning_rate": 0.00019239227973890622, | |
| "loss": 1.1357, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2137859549105986, | |
| "grad_norm": 0.469142216595277, | |
| "learning_rate": 0.0001922182994353697, | |
| "loss": 1.1709, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.21508162736460223, | |
| "grad_norm": 0.4205186026441914, | |
| "learning_rate": 0.0001920424326763706, | |
| "loss": 1.1846, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.21637729981860584, | |
| "grad_norm": 0.4364780340562057, | |
| "learning_rate": 0.00019186468305951165, | |
| "loss": 1.1611, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.21767297227260948, | |
| "grad_norm": 0.42862545463693674, | |
| "learning_rate": 0.00019168505422091214, | |
| "loss": 1.2604, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.21896864472661312, | |
| "grad_norm": 0.4298577312506422, | |
| "learning_rate": 0.00019150354983513346, | |
| "loss": 1.1584, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.22026431718061673, | |
| "grad_norm": 0.4188255164345959, | |
| "learning_rate": 0.00019132017361510396, | |
| "loss": 1.1681, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.22155998963462037, | |
| "grad_norm": 0.42610008321144127, | |
| "learning_rate": 0.00019113492931204304, | |
| "loss": 1.2502, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.222855662088624, | |
| "grad_norm": 0.41917044829804134, | |
| "learning_rate": 0.00019094782071538434, | |
| "loss": 1.1441, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.22415133454262762, | |
| "grad_norm": 0.41937866972534293, | |
| "learning_rate": 0.0001907588516526983, | |
| "loss": 1.2056, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.22544700699663126, | |
| "grad_norm": 0.47221544899406714, | |
| "learning_rate": 0.00019056802598961376, | |
| "loss": 1.1754, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2267426794506349, | |
| "grad_norm": 0.45128209262811003, | |
| "learning_rate": 0.000190375347629739, | |
| "loss": 1.1963, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.2280383519046385, | |
| "grad_norm": 0.41254887942116875, | |
| "learning_rate": 0.00019018082051458176, | |
| "loss": 1.1696, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.22933402435864214, | |
| "grad_norm": 0.4336535335825639, | |
| "learning_rate": 0.00018998444862346873, | |
| "loss": 1.1942, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.23062969681264575, | |
| "grad_norm": 0.4203068249880088, | |
| "learning_rate": 0.00018978623597346408, | |
| "loss": 1.1571, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2319253692666494, | |
| "grad_norm": 0.4504420384371143, | |
| "learning_rate": 0.00018958618661928732, | |
| "loss": 1.2145, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.23322104172065303, | |
| "grad_norm": 1.2852982602777483, | |
| "learning_rate": 0.00018938430465323034, | |
| "loss": 1.1602, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.23451671417465664, | |
| "grad_norm": 0.4456499671770626, | |
| "learning_rate": 0.0001891805942050736, | |
| "loss": 1.2208, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.23581238662866028, | |
| "grad_norm": 0.4218488577208689, | |
| "learning_rate": 0.00018897505944200186, | |
| "loss": 1.1544, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2371080590826639, | |
| "grad_norm": 0.4509293570117875, | |
| "learning_rate": 0.00018876770456851877, | |
| "loss": 1.2167, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.23840373153666752, | |
| "grad_norm": 0.4354264628463739, | |
| "learning_rate": 0.00018855853382636093, | |
| "loss": 1.2103, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.23969940399067116, | |
| "grad_norm": 0.41330759489370955, | |
| "learning_rate": 0.00018834755149441104, | |
| "loss": 1.1713, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.2409950764446748, | |
| "grad_norm": 0.43531782144147513, | |
| "learning_rate": 0.00018813476188861043, | |
| "loss": 1.1783, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2422907488986784, | |
| "grad_norm": 0.4260507531562403, | |
| "learning_rate": 0.00018792016936187086, | |
| "loss": 1.1473, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.24358642135268205, | |
| "grad_norm": 0.46161844432613697, | |
| "learning_rate": 0.00018770377830398525, | |
| "loss": 1.1719, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.24488209380668566, | |
| "grad_norm": 0.44108410378556556, | |
| "learning_rate": 0.00018748559314153818, | |
| "loss": 1.2101, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.2461777662606893, | |
| "grad_norm": 0.43144293315888466, | |
| "learning_rate": 0.00018726561833781497, | |
| "loss": 1.1933, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.24747343871469293, | |
| "grad_norm": 0.437185606075459, | |
| "learning_rate": 0.00018704385839271074, | |
| "loss": 1.1563, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.24876911116869654, | |
| "grad_norm": 0.44187547375302677, | |
| "learning_rate": 0.00018682031784263814, | |
| "loss": 1.1219, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.2500647836227002, | |
| "grad_norm": 0.4286003185934626, | |
| "learning_rate": 0.00018659500126043456, | |
| "loss": 1.1347, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.2513604560767038, | |
| "grad_norm": 0.5193197943854574, | |
| "learning_rate": 0.00018636791325526872, | |
| "loss": 1.2118, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.25265612853070746, | |
| "grad_norm": 0.4243801823353042, | |
| "learning_rate": 0.00018613905847254624, | |
| "loss": 1.2015, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.25395180098471104, | |
| "grad_norm": 0.4017619629276402, | |
| "learning_rate": 0.0001859084415938147, | |
| "loss": 1.1405, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2552474734387147, | |
| "grad_norm": 0.46780432324474447, | |
| "learning_rate": 0.00018567606733666775, | |
| "loss": 1.1803, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.2565431458927183, | |
| "grad_norm": 0.41305152773119796, | |
| "learning_rate": 0.00018544194045464886, | |
| "loss": 1.138, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.25783881834672195, | |
| "grad_norm": 0.4090708833399176, | |
| "learning_rate": 0.0001852060657371538, | |
| "loss": 1.2036, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.2591344908007256, | |
| "grad_norm": 0.43431398541973126, | |
| "learning_rate": 0.00018496844800933277, | |
| "loss": 1.1328, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.26043016325472923, | |
| "grad_norm": 0.4473287652132595, | |
| "learning_rate": 0.0001847290921319918, | |
| "loss": 1.183, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.2617258357087328, | |
| "grad_norm": 0.42139497138720455, | |
| "learning_rate": 0.00018448800300149314, | |
| "loss": 1.1885, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.26302150816273645, | |
| "grad_norm": 0.4192742651155342, | |
| "learning_rate": 0.00018424518554965516, | |
| "loss": 1.1673, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.2643171806167401, | |
| "grad_norm": 0.4276967282946324, | |
| "learning_rate": 0.00018400064474365156, | |
| "loss": 1.1442, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.2656128530707437, | |
| "grad_norm": 0.4279959695376702, | |
| "learning_rate": 0.00018375438558590967, | |
| "loss": 1.1713, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.26690852552474736, | |
| "grad_norm": 0.4138904922875282, | |
| "learning_rate": 0.00018350641311400812, | |
| "loss": 1.1294, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.26820419797875095, | |
| "grad_norm": 0.43337813322172525, | |
| "learning_rate": 0.0001832567324005737, | |
| "loss": 1.1421, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.2694998704327546, | |
| "grad_norm": 0.4418444877860619, | |
| "learning_rate": 0.00018300534855317783, | |
| "loss": 1.1775, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2707955428867582, | |
| "grad_norm": 0.46191806443855654, | |
| "learning_rate": 0.00018275226671423195, | |
| "loss": 1.1188, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.27209121534076186, | |
| "grad_norm": 0.4449373885557454, | |
| "learning_rate": 0.0001824974920608821, | |
| "loss": 1.1445, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2733868877947655, | |
| "grad_norm": 0.4138240087897747, | |
| "learning_rate": 0.0001822410298049035, | |
| "loss": 1.1403, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.27468256024876914, | |
| "grad_norm": 0.4410582459345502, | |
| "learning_rate": 0.00018198288519259353, | |
| "loss": 1.1835, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2759782327027727, | |
| "grad_norm": 0.4419431606975578, | |
| "learning_rate": 0.0001817230635046645, | |
| "loss": 1.2308, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.27727390515677636, | |
| "grad_norm": 0.41612794192020086, | |
| "learning_rate": 0.0001814615700561358, | |
| "loss": 1.21, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.27856957761078, | |
| "grad_norm": 0.4728861496942093, | |
| "learning_rate": 0.00018119841019622487, | |
| "loss": 1.1687, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.27986525006478363, | |
| "grad_norm": 0.39483035476368555, | |
| "learning_rate": 0.0001809335893082381, | |
| "loss": 1.2031, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.28116092251878727, | |
| "grad_norm": 0.40167341751112157, | |
| "learning_rate": 0.0001806671128094605, | |
| "loss": 1.1495, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.28245659497279085, | |
| "grad_norm": 0.42702634032994197, | |
| "learning_rate": 0.0001803989861510449, | |
| "loss": 1.1885, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2837522674267945, | |
| "grad_norm": 0.3970503569404702, | |
| "learning_rate": 0.00018012921481790054, | |
| "loss": 1.1608, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.28504793988079813, | |
| "grad_norm": 0.42637878811973734, | |
| "learning_rate": 0.0001798578043285807, | |
| "loss": 1.1386, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.28634361233480177, | |
| "grad_norm": 0.39626715470331225, | |
| "learning_rate": 0.00017958476023517008, | |
| "loss": 1.1355, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.2876392847888054, | |
| "grad_norm": 0.4158526138591002, | |
| "learning_rate": 0.00017931008812317089, | |
| "loss": 1.1844, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.28893495724280904, | |
| "grad_norm": 0.4110580138866959, | |
| "learning_rate": 0.00017903379361138884, | |
| "loss": 1.179, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.2902306296968126, | |
| "grad_norm": 0.39558148991812647, | |
| "learning_rate": 0.0001787558823518181, | |
| "loss": 1.1529, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.29152630215081626, | |
| "grad_norm": 0.43235047146868255, | |
| "learning_rate": 0.0001784763600295257, | |
| "loss": 1.1253, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.2928219746048199, | |
| "grad_norm": 0.4029217795192826, | |
| "learning_rate": 0.00017819523236253524, | |
| "loss": 1.1775, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 0.43737260937528, | |
| "learning_rate": 0.0001779125051017099, | |
| "loss": 1.1658, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.2954133195128272, | |
| "grad_norm": 0.4223589589109796, | |
| "learning_rate": 0.00017762818403063485, | |
| "loss": 1.2193, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.29670899196683076, | |
| "grad_norm": 0.42153072570772854, | |
| "learning_rate": 0.0001773422749654988, | |
| "loss": 1.1381, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.2980046644208344, | |
| "grad_norm": 0.44333235395249593, | |
| "learning_rate": 0.0001770547837549752, | |
| "loss": 1.2025, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.29930033687483804, | |
| "grad_norm": 0.390430213600016, | |
| "learning_rate": 0.0001767657162801025, | |
| "loss": 1.2179, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.3005960093288417, | |
| "grad_norm": 0.39596734484692325, | |
| "learning_rate": 0.00017647507845416392, | |
| "loss": 1.1443, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3018916817828453, | |
| "grad_norm": 0.3964805626892672, | |
| "learning_rate": 0.00017618287622256625, | |
| "loss": 1.0953, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.30318735423684895, | |
| "grad_norm": 0.397165519182473, | |
| "learning_rate": 0.00017588911556271858, | |
| "loss": 1.2468, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.30448302669085253, | |
| "grad_norm": 0.40738383895968944, | |
| "learning_rate": 0.00017559380248390982, | |
| "loss": 1.2279, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.30577869914485617, | |
| "grad_norm": 0.4322556749554571, | |
| "learning_rate": 0.00017529694302718574, | |
| "loss": 1.1614, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3070743715988598, | |
| "grad_norm": 0.3795921376766387, | |
| "learning_rate": 0.0001749985432652255, | |
| "loss": 1.1514, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.30837004405286345, | |
| "grad_norm": 0.3984980575708157, | |
| "learning_rate": 0.00017469860930221734, | |
| "loss": 1.2262, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3096657165068671, | |
| "grad_norm": 0.4239852292999169, | |
| "learning_rate": 0.00017439714727373378, | |
| "loss": 1.1279, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.31096138896087067, | |
| "grad_norm": 0.4243568518066299, | |
| "learning_rate": 0.00017409416334660606, | |
| "loss": 1.1482, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3122570614148743, | |
| "grad_norm": 0.39261780158026205, | |
| "learning_rate": 0.00017378966371879803, | |
| "loss": 1.1245, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.31355273386887794, | |
| "grad_norm": 0.4129200331115499, | |
| "learning_rate": 0.00017348365461927932, | |
| "loss": 1.164, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3148484063228816, | |
| "grad_norm": 0.46767017383779913, | |
| "learning_rate": 0.00017317614230789792, | |
| "loss": 1.2038, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.3161440787768852, | |
| "grad_norm": 0.4636682224687034, | |
| "learning_rate": 0.00017286713307525212, | |
| "loss": 1.1798, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.31743975123088886, | |
| "grad_norm": 0.464530514696448, | |
| "learning_rate": 0.00017255663324256194, | |
| "loss": 1.1569, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.31873542368489244, | |
| "grad_norm": 0.49392901042147025, | |
| "learning_rate": 0.00017224464916153963, | |
| "loss": 1.1722, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3200310961388961, | |
| "grad_norm": 0.437281931716071, | |
| "learning_rate": 0.00017193118721425986, | |
| "loss": 1.1472, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.3213267685928997, | |
| "grad_norm": 0.49973298936543603, | |
| "learning_rate": 0.00017161625381302914, | |
| "loss": 1.1662, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.32262244104690335, | |
| "grad_norm": 0.9030290188031315, | |
| "learning_rate": 0.00017129985540025473, | |
| "loss": 1.1751, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.323918113500907, | |
| "grad_norm": 0.46932404302432773, | |
| "learning_rate": 0.00017098199844831262, | |
| "loss": 1.1753, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3252137859549106, | |
| "grad_norm": 0.43737397519531773, | |
| "learning_rate": 0.0001706626894594154, | |
| "loss": 1.1539, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.3265094584089142, | |
| "grad_norm": 0.47678251178263875, | |
| "learning_rate": 0.00017034193496547902, | |
| "loss": 1.1572, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.32780513086291785, | |
| "grad_norm": 0.48005221081289434, | |
| "learning_rate": 0.00017001974152798942, | |
| "loss": 1.2371, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.3291008033169215, | |
| "grad_norm": 0.4005197274939468, | |
| "learning_rate": 0.000169696115737868, | |
| "loss": 1.1375, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3303964757709251, | |
| "grad_norm": 0.520470386619454, | |
| "learning_rate": 0.00016937106421533707, | |
| "loss": 1.2201, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.33169214822492876, | |
| "grad_norm": 0.4024597397089223, | |
| "learning_rate": 0.00016904459360978427, | |
| "loss": 1.16, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.33298782067893234, | |
| "grad_norm": 0.43690159855763916, | |
| "learning_rate": 0.00016871671059962655, | |
| "loss": 1.2303, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.334283493132936, | |
| "grad_norm": 0.42841787432183803, | |
| "learning_rate": 0.00016838742189217366, | |
| "loss": 1.1601, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3355791655869396, | |
| "grad_norm": 0.41150940120312113, | |
| "learning_rate": 0.00016805673422349082, | |
| "loss": 1.1542, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.33687483804094326, | |
| "grad_norm": 0.4108846561673421, | |
| "learning_rate": 0.000167724654358261, | |
| "loss": 1.1497, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3381705104949469, | |
| "grad_norm": 0.4485077675503156, | |
| "learning_rate": 0.00016739118908964647, | |
| "loss": 1.1979, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.3394661829489505, | |
| "grad_norm": 0.40380367544471263, | |
| "learning_rate": 0.00016705634523915, | |
| "loss": 1.156, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3407618554029541, | |
| "grad_norm": 0.4315676369718417, | |
| "learning_rate": 0.000166720129656475, | |
| "loss": 1.1876, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.34205752785695775, | |
| "grad_norm": 0.42298698967693915, | |
| "learning_rate": 0.00016638254921938587, | |
| "loss": 1.226, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.3433532003109614, | |
| "grad_norm": 0.40711175836921143, | |
| "learning_rate": 0.00016604361083356675, | |
| "loss": 1.1865, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.34464887276496503, | |
| "grad_norm": 0.4209373436004817, | |
| "learning_rate": 0.0001657033214324807, | |
| "loss": 1.1485, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.34594454521896867, | |
| "grad_norm": 0.3973628247221751, | |
| "learning_rate": 0.0001653616879772277, | |
| "loss": 1.1419, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.34724021767297225, | |
| "grad_norm": 0.40714636825697015, | |
| "learning_rate": 0.00016501871745640213, | |
| "loss": 1.1607, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3485358901269759, | |
| "grad_norm": 0.4091289111560899, | |
| "learning_rate": 0.00016467441688595015, | |
| "loss": 1.169, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.3498315625809795, | |
| "grad_norm": 0.39294191213880614, | |
| "learning_rate": 0.0001643287933090258, | |
| "loss": 1.1856, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.35112723503498317, | |
| "grad_norm": 0.4153756492643378, | |
| "learning_rate": 0.00016398185379584707, | |
| "loss": 1.1601, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.3524229074889868, | |
| "grad_norm": 0.39772148869790713, | |
| "learning_rate": 0.0001636336054435514, | |
| "loss": 1.1402, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3537185799429904, | |
| "grad_norm": 0.4071165220940827, | |
| "learning_rate": 0.00016328405537605032, | |
| "loss": 1.1333, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.355014252396994, | |
| "grad_norm": 0.4269465126269822, | |
| "learning_rate": 0.00016293321074388375, | |
| "loss": 1.1948, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.35630992485099766, | |
| "grad_norm": 0.41865801202656294, | |
| "learning_rate": 0.00016258107872407375, | |
| "loss": 1.1465, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.3576055973050013, | |
| "grad_norm": 0.40838786927024157, | |
| "learning_rate": 0.00016222766651997789, | |
| "loss": 1.1695, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.35890126975900494, | |
| "grad_norm": 0.4096176785342024, | |
| "learning_rate": 0.0001618729813611414, | |
| "loss": 1.1447, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.3601969422130086, | |
| "grad_norm": 0.41309116576243843, | |
| "learning_rate": 0.00016151703050314986, | |
| "loss": 1.1804, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.36149261466701216, | |
| "grad_norm": 0.4053926841703832, | |
| "learning_rate": 0.00016115982122748043, | |
| "loss": 1.1471, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.3627882871210158, | |
| "grad_norm": 0.4058497805268484, | |
| "learning_rate": 0.00016080136084135297, | |
| "loss": 1.1494, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.36408395957501943, | |
| "grad_norm": 0.4216552491807955, | |
| "learning_rate": 0.00016044165667758055, | |
| "loss": 1.1928, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.36537963202902307, | |
| "grad_norm": 0.4336798036895343, | |
| "learning_rate": 0.0001600807160944195, | |
| "loss": 1.19, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.3666753044830267, | |
| "grad_norm": 0.42380600181334993, | |
| "learning_rate": 0.00015971854647541884, | |
| "loss": 1.1674, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.3679709769370303, | |
| "grad_norm": 0.4116489897844781, | |
| "learning_rate": 0.00015935515522926927, | |
| "loss": 1.1407, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.36926664939103393, | |
| "grad_norm": 0.4051816422975703, | |
| "learning_rate": 0.00015899054978965157, | |
| "loss": 1.1861, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.37056232184503757, | |
| "grad_norm": 0.4374412539295244, | |
| "learning_rate": 0.0001586247376150846, | |
| "loss": 1.2273, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.3718579942990412, | |
| "grad_norm": 0.4225494310104097, | |
| "learning_rate": 0.00015825772618877263, | |
| "loss": 1.2218, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.37315366675304484, | |
| "grad_norm": 0.4202018672382752, | |
| "learning_rate": 0.00015788952301845237, | |
| "loss": 1.1155, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.3744493392070485, | |
| "grad_norm": 0.4313070413558118, | |
| "learning_rate": 0.0001575201356362393, | |
| "loss": 1.1551, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.37574501166105206, | |
| "grad_norm": 0.40304574068983823, | |
| "learning_rate": 0.00015714957159847367, | |
| "loss": 1.1491, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3770406841150557, | |
| "grad_norm": 0.4413697670634288, | |
| "learning_rate": 0.00015677783848556576, | |
| "loss": 1.1631, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.37833635656905934, | |
| "grad_norm": 0.4021843049770175, | |
| "learning_rate": 0.00015640494390184112, | |
| "loss": 1.1304, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.379632029023063, | |
| "grad_norm": 0.41280223940193794, | |
| "learning_rate": 0.0001560308954753847, | |
| "loss": 1.1458, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.3809277014770666, | |
| "grad_norm": 0.44403414056325813, | |
| "learning_rate": 0.00015565570085788495, | |
| "loss": 1.2007, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.3822233739310702, | |
| "grad_norm": 0.42327265730452945, | |
| "learning_rate": 0.00015527936772447725, | |
| "loss": 1.1168, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.38351904638507384, | |
| "grad_norm": 0.41872242471581905, | |
| "learning_rate": 0.00015490190377358704, | |
| "loss": 1.1551, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3848147188390775, | |
| "grad_norm": 0.4076213102610575, | |
| "learning_rate": 0.00015452331672677206, | |
| "loss": 1.0902, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.3861103912930811, | |
| "grad_norm": 0.39807246728274887, | |
| "learning_rate": 0.00015414361432856475, | |
| "loss": 1.1598, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.38740606374708475, | |
| "grad_norm": 0.40854063250285516, | |
| "learning_rate": 0.00015376280434631345, | |
| "loss": 1.1806, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.3887017362010884, | |
| "grad_norm": 0.40837052057355316, | |
| "learning_rate": 0.00015338089457002382, | |
| "loss": 1.0829, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38999740865509197, | |
| "grad_norm": 0.4123609619777807, | |
| "learning_rate": 0.00015299789281219935, | |
| "loss": 1.1688, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.3912930811090956, | |
| "grad_norm": 0.4316199953541538, | |
| "learning_rate": 0.00015261380690768144, | |
| "loss": 1.1543, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.39258875356309925, | |
| "grad_norm": 0.4398633371362292, | |
| "learning_rate": 0.00015222864471348943, | |
| "loss": 1.1724, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.3938844260171029, | |
| "grad_norm": 0.4081954480961544, | |
| "learning_rate": 0.00015184241410865954, | |
| "loss": 1.1269, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.3951800984711065, | |
| "grad_norm": 0.3994337234690745, | |
| "learning_rate": 0.00015145512299408388, | |
| "loss": 1.1598, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.3964757709251101, | |
| "grad_norm": 0.4415117779921472, | |
| "learning_rate": 0.00015106677929234877, | |
| "loss": 1.0969, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.39777144337911374, | |
| "grad_norm": 0.4033267536569039, | |
| "learning_rate": 0.0001506773909475727, | |
| "loss": 1.1955, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.3990671158331174, | |
| "grad_norm": 0.39024517506575723, | |
| "learning_rate": 0.00015028696592524386, | |
| "loss": 1.1562, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.400362788287121, | |
| "grad_norm": 0.4359801226192451, | |
| "learning_rate": 0.000149895512212057, | |
| "loss": 1.1695, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.40165846074112466, | |
| "grad_norm": 0.3996337334749442, | |
| "learning_rate": 0.00014950303781575034, | |
| "loss": 1.0982, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4029541331951283, | |
| "grad_norm": 0.39816310192557475, | |
| "learning_rate": 0.00014910955076494152, | |
| "loss": 1.1223, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.4042498056491319, | |
| "grad_norm": 0.37894092829518466, | |
| "learning_rate": 0.00014871505910896352, | |
| "loss": 1.1217, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4055454781031355, | |
| "grad_norm": 0.4319425134415384, | |
| "learning_rate": 0.0001483195709176999, | |
| "loss": 1.1966, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.40684115055713915, | |
| "grad_norm": 0.3996173231205214, | |
| "learning_rate": 0.00014792309428141978, | |
| "loss": 1.1039, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4081368230111428, | |
| "grad_norm": 0.40718491453474887, | |
| "learning_rate": 0.0001475256373106123, | |
| "loss": 1.1725, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.40943249546514643, | |
| "grad_norm": 0.4286665586760103, | |
| "learning_rate": 0.00014712720813582066, | |
| "loss": 1.1443, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.41072816791915, | |
| "grad_norm": 0.4033525905918133, | |
| "learning_rate": 0.00014672781490747606, | |
| "loss": 1.1742, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.41202384037315365, | |
| "grad_norm": 0.40558326017198004, | |
| "learning_rate": 0.00014632746579573052, | |
| "loss": 1.2117, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4133195128271573, | |
| "grad_norm": 0.4056524890982348, | |
| "learning_rate": 0.0001459261689902902, | |
| "loss": 1.2034, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.4146151852811609, | |
| "grad_norm": 0.44100980335173223, | |
| "learning_rate": 0.00014552393270024765, | |
| "loss": 1.1544, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.41591085773516456, | |
| "grad_norm": 0.39891755340287666, | |
| "learning_rate": 0.00014512076515391375, | |
| "loss": 1.1256, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.4172065301891682, | |
| "grad_norm": 0.40682040920269774, | |
| "learning_rate": 0.00014471667459864973, | |
| "loss": 1.1499, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4185022026431718, | |
| "grad_norm": 0.44312416626229856, | |
| "learning_rate": 0.00014431166930069816, | |
| "loss": 1.0977, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.4197978750971754, | |
| "grad_norm": 0.4301392634897964, | |
| "learning_rate": 0.00014390575754501402, | |
| "loss": 1.139, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.42109354755117906, | |
| "grad_norm": 0.418724699659618, | |
| "learning_rate": 0.0001434989476350951, | |
| "loss": 1.1592, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.4223892200051827, | |
| "grad_norm": 0.4134020107191313, | |
| "learning_rate": 0.00014309124789281226, | |
| "loss": 1.2104, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.42368489245918634, | |
| "grad_norm": 0.39580446880805387, | |
| "learning_rate": 0.00014268266665823912, | |
| "loss": 1.1757, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.42498056491319, | |
| "grad_norm": 0.41472381643526246, | |
| "learning_rate": 0.00014227321228948146, | |
| "loss": 1.1322, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.42627623736719356, | |
| "grad_norm": 0.39275693329437805, | |
| "learning_rate": 0.0001418628931625062, | |
| "loss": 1.1742, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.4275719098211972, | |
| "grad_norm": 0.38619824420216714, | |
| "learning_rate": 0.00014145171767097027, | |
| "loss": 1.1511, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.42886758227520083, | |
| "grad_norm": 0.4254507618246464, | |
| "learning_rate": 0.00014103969422604856, | |
| "loss": 1.1231, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.43016325472920447, | |
| "grad_norm": 0.40459401347151125, | |
| "learning_rate": 0.00014062683125626218, | |
| "loss": 1.1384, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4314589271832081, | |
| "grad_norm": 0.409140169024848, | |
| "learning_rate": 0.0001402131372073058, | |
| "loss": 1.191, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.4327545996372117, | |
| "grad_norm": 0.389695812394897, | |
| "learning_rate": 0.00013979862054187505, | |
| "loss": 1.1543, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.43405027209121533, | |
| "grad_norm": 0.42112717245729503, | |
| "learning_rate": 0.00013938328973949336, | |
| "loss": 1.1715, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.43534594454521897, | |
| "grad_norm": 0.3864671779444786, | |
| "learning_rate": 0.0001389671532963384, | |
| "loss": 1.1508, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4366416169992226, | |
| "grad_norm": 0.39052429534303434, | |
| "learning_rate": 0.00013855021972506844, | |
| "loss": 1.1054, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.43793728945322624, | |
| "grad_norm": 0.404117930960366, | |
| "learning_rate": 0.0001381324975546481, | |
| "loss": 1.1623, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4392329619072299, | |
| "grad_norm": 0.45134279442398223, | |
| "learning_rate": 0.0001377139953301739, | |
| "loss": 1.1301, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.44052863436123346, | |
| "grad_norm": 0.42206784378450607, | |
| "learning_rate": 0.00013729472161269946, | |
| "loss": 1.18, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4418243068152371, | |
| "grad_norm": 0.43048779695832334, | |
| "learning_rate": 0.00013687468497906044, | |
| "loss": 1.1413, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.44311997926924074, | |
| "grad_norm": 0.3902243379067639, | |
| "learning_rate": 0.00013645389402169893, | |
| "loss": 1.1473, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4444156517232444, | |
| "grad_norm": 0.3790071383777115, | |
| "learning_rate": 0.00013603235734848784, | |
| "loss": 1.2114, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.445711324177248, | |
| "grad_norm": 0.39998613746463574, | |
| "learning_rate": 0.00013561008358255468, | |
| "loss": 1.1193, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.4470069966312516, | |
| "grad_norm": 0.4136780934178077, | |
| "learning_rate": 0.0001351870813621054, | |
| "loss": 1.1417, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.44830266908525523, | |
| "grad_norm": 0.39313685685153266, | |
| "learning_rate": 0.00013476335934024735, | |
| "loss": 1.1437, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.44959834153925887, | |
| "grad_norm": 0.41028501570471904, | |
| "learning_rate": 0.00013433892618481248, | |
| "loss": 1.1032, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.4508940139932625, | |
| "grad_norm": 0.43704478597398877, | |
| "learning_rate": 0.00013391379057817995, | |
| "loss": 1.1933, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.45218968644726615, | |
| "grad_norm": 0.37937862506331654, | |
| "learning_rate": 0.00013348796121709862, | |
| "loss": 1.187, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.4534853589012698, | |
| "grad_norm": 0.40181873506259413, | |
| "learning_rate": 0.00013306144681250908, | |
| "loss": 1.1625, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.45478103135527337, | |
| "grad_norm": 0.4117326535807377, | |
| "learning_rate": 0.00013263425608936536, | |
| "loss": 1.1875, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.456076703809277, | |
| "grad_norm": 0.39234377730019654, | |
| "learning_rate": 0.00013220639778645663, | |
| "loss": 1.1888, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.45737237626328064, | |
| "grad_norm": 0.42622878050330665, | |
| "learning_rate": 0.0001317778806562283, | |
| "loss": 1.151, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.4586680487172843, | |
| "grad_norm": 0.3997439152224071, | |
| "learning_rate": 0.000131348713464603, | |
| "loss": 1.1271, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.4599637211712879, | |
| "grad_norm": 0.4151543746885238, | |
| "learning_rate": 0.0001309189049908014, | |
| "loss": 1.1745, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.4612593936252915, | |
| "grad_norm": 0.4136123970052988, | |
| "learning_rate": 0.00013048846402716237, | |
| "loss": 1.1446, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.46255506607929514, | |
| "grad_norm": 0.3925741414110596, | |
| "learning_rate": 0.0001300573993789633, | |
| "loss": 1.1186, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.4638507385332988, | |
| "grad_norm": 0.43074300043773284, | |
| "learning_rate": 0.00012962571986423993, | |
| "loss": 1.2004, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.4651464109873024, | |
| "grad_norm": 0.3948645456920277, | |
| "learning_rate": 0.00012919343431360596, | |
| "loss": 1.1534, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.46644208344130605, | |
| "grad_norm": 0.4423236206923003, | |
| "learning_rate": 0.00012876055157007242, | |
| "loss": 1.1509, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.4677377558953097, | |
| "grad_norm": 0.39371508060725335, | |
| "learning_rate": 0.00012832708048886679, | |
| "loss": 1.1941, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.4690334283493133, | |
| "grad_norm": 0.4156050706970669, | |
| "learning_rate": 0.00012789302993725175, | |
| "loss": 1.2233, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.4703291008033169, | |
| "grad_norm": 0.3844699731827056, | |
| "learning_rate": 0.0001274584087943439, | |
| "loss": 1.172, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.47162477325732055, | |
| "grad_norm": 0.392439037665497, | |
| "learning_rate": 0.00012702322595093212, | |
| "loss": 1.1935, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.4729204457113242, | |
| "grad_norm": 0.42109447627306007, | |
| "learning_rate": 0.00012658749030929566, | |
| "loss": 1.0821, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.4742161181653278, | |
| "grad_norm": 0.3985407382924968, | |
| "learning_rate": 0.00012615121078302202, | |
| "loss": 1.1564, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.4755117906193314, | |
| "grad_norm": 0.40190944372077425, | |
| "learning_rate": 0.0001257143962968246, | |
| "loss": 1.1858, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.47680746307333505, | |
| "grad_norm": 0.45076787480964914, | |
| "learning_rate": 0.00012527705578636023, | |
| "loss": 1.1514, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.4781031355273387, | |
| "grad_norm": 0.4363861711704788, | |
| "learning_rate": 0.0001248391981980462, | |
| "loss": 1.133, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.4793988079813423, | |
| "grad_norm": 0.43776230767541446, | |
| "learning_rate": 0.00012440083248887754, | |
| "loss": 1.2082, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.48069448043534596, | |
| "grad_norm": 0.4127859770012284, | |
| "learning_rate": 0.00012396196762624341, | |
| "loss": 1.1613, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.4819901528893496, | |
| "grad_norm": 0.3940447846474964, | |
| "learning_rate": 0.00012352261258774395, | |
| "loss": 1.15, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.4832858253433532, | |
| "grad_norm": 0.4193523558931992, | |
| "learning_rate": 0.0001230827763610066, | |
| "loss": 1.1382, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.4845814977973568, | |
| "grad_norm": 0.395557633792394, | |
| "learning_rate": 0.00012264246794350202, | |
| "loss": 1.1678, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.48587717025136046, | |
| "grad_norm": 0.40423444904954847, | |
| "learning_rate": 0.00012220169634236038, | |
| "loss": 1.157, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.4871728427053641, | |
| "grad_norm": 0.3870746898688869, | |
| "learning_rate": 0.00012176047057418682, | |
| "loss": 1.1439, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.48846851515936773, | |
| "grad_norm": 0.4458893388021195, | |
| "learning_rate": 0.00012131879966487709, | |
| "loss": 1.1513, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.4897641876133713, | |
| "grad_norm": 0.38478400725184214, | |
| "learning_rate": 0.00012087669264943302, | |
| "loss": 1.1333, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.49105986006737495, | |
| "grad_norm": 0.37819480972862285, | |
| "learning_rate": 0.00012043415857177751, | |
| "loss": 1.1663, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.4923555325213786, | |
| "grad_norm": 0.4125210415708117, | |
| "learning_rate": 0.00011999120648456974, | |
| "loss": 1.1457, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.49365120497538223, | |
| "grad_norm": 0.3869780886696955, | |
| "learning_rate": 0.00011954784544901971, | |
| "loss": 1.1536, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.49494687742938587, | |
| "grad_norm": 0.40907576777936727, | |
| "learning_rate": 0.00011910408453470316, | |
| "loss": 1.1361, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.4962425498833895, | |
| "grad_norm": 0.3737647497815572, | |
| "learning_rate": 0.00011865993281937589, | |
| "loss": 1.1006, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.4975382223373931, | |
| "grad_norm": 0.41125742423172207, | |
| "learning_rate": 0.00011821539938878801, | |
| "loss": 1.1641, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.4988338947913967, | |
| "grad_norm": 0.38132649216646763, | |
| "learning_rate": 0.00011777049333649826, | |
| "loss": 1.1525, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.5001295672454004, | |
| "grad_norm": 0.38834910320923277, | |
| "learning_rate": 0.00011732522376368781, | |
| "loss": 1.1531, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.501425239699404, | |
| "grad_norm": 0.40314094443067383, | |
| "learning_rate": 0.0001168795997789742, | |
| "loss": 1.1592, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.5027209121534076, | |
| "grad_norm": 0.4040865102971838, | |
| "learning_rate": 0.00011643363049822496, | |
| "loss": 1.2547, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5040165846074113, | |
| "grad_norm": 0.38562943372958325, | |
| "learning_rate": 0.00011598732504437107, | |
| "loss": 1.1109, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.5053122570614149, | |
| "grad_norm": 0.4250211446665384, | |
| "learning_rate": 0.00011554069254722051, | |
| "loss": 1.1941, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5066079295154186, | |
| "grad_norm": 0.4032090808510551, | |
| "learning_rate": 0.00011509374214327131, | |
| "loss": 1.1572, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.5079036019694221, | |
| "grad_norm": 0.40575976251729745, | |
| "learning_rate": 0.00011464648297552478, | |
| "loss": 1.1657, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5091992744234257, | |
| "grad_norm": 0.41052107415559824, | |
| "learning_rate": 0.00011419892419329844, | |
| "loss": 1.1642, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.5104949468774294, | |
| "grad_norm": 0.3958628275091308, | |
| "learning_rate": 0.00011375107495203873, | |
| "loss": 1.2116, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.511790619331433, | |
| "grad_norm": 0.38478613819779206, | |
| "learning_rate": 0.00011330294441313402, | |
| "loss": 1.1451, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.5130862917854366, | |
| "grad_norm": 0.43128674114012594, | |
| "learning_rate": 0.00011285454174372692, | |
| "loss": 1.1494, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5143819642394403, | |
| "grad_norm": 0.4424953381805022, | |
| "learning_rate": 0.0001124058761165268, | |
| "loss": 1.0932, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.5156776366934439, | |
| "grad_norm": 0.4063559419481111, | |
| "learning_rate": 0.00011195695670962234, | |
| "loss": 1.1137, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5169733091474475, | |
| "grad_norm": 0.3975356665709952, | |
| "learning_rate": 0.00011150779270629353, | |
| "loss": 1.1873, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.5182689816014512, | |
| "grad_norm": 0.4185740854748383, | |
| "learning_rate": 0.00011105839329482397, | |
| "loss": 1.1108, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5195646540554548, | |
| "grad_norm": 0.38989666093808334, | |
| "learning_rate": 0.00011060876766831285, | |
| "loss": 1.1663, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.5208603265094585, | |
| "grad_norm": 0.40596909372428913, | |
| "learning_rate": 0.00011015892502448692, | |
| "loss": 1.1382, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.522155998963462, | |
| "grad_norm": 0.37956800433559007, | |
| "learning_rate": 0.00010970887456551234, | |
| "loss": 1.104, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.5234516714174656, | |
| "grad_norm": 0.3972204538731939, | |
| "learning_rate": 0.00010925862549780637, | |
| "loss": 1.1414, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5247473438714693, | |
| "grad_norm": 0.4266101329792826, | |
| "learning_rate": 0.00010880818703184919, | |
| "loss": 1.2131, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.5260430163254729, | |
| "grad_norm": 0.4059164191077208, | |
| "learning_rate": 0.00010835756838199524, | |
| "loss": 1.1005, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5273386887794765, | |
| "grad_norm": 0.37690318544316265, | |
| "learning_rate": 0.00010790677876628501, | |
| "loss": 1.1448, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.5286343612334802, | |
| "grad_norm": 0.39147947369844754, | |
| "learning_rate": 0.00010745582740625631, | |
| "loss": 1.161, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5299300336874838, | |
| "grad_norm": 0.4061256597825514, | |
| "learning_rate": 0.00010700472352675556, | |
| "loss": 1.1861, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.5312257061414875, | |
| "grad_norm": 0.3877742069695207, | |
| "learning_rate": 0.00010655347635574937, | |
| "loss": 1.1816, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5325213785954911, | |
| "grad_norm": 0.3971133244937986, | |
| "learning_rate": 0.00010610209512413536, | |
| "loss": 1.2017, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.5338170510494947, | |
| "grad_norm": 0.41551450058475425, | |
| "learning_rate": 0.0001056505890655537, | |
| "loss": 1.1692, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5351127235034984, | |
| "grad_norm": 0.43646326285085413, | |
| "learning_rate": 0.00010519896741619803, | |
| "loss": 1.0993, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.5364083959575019, | |
| "grad_norm": 0.38667765038581386, | |
| "learning_rate": 0.00010474723941462658, | |
| "loss": 1.1151, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5377040684115055, | |
| "grad_norm": 0.39156927596261365, | |
| "learning_rate": 0.00010429541430157313, | |
| "loss": 1.1641, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.5389997408655092, | |
| "grad_norm": 0.39506144218425854, | |
| "learning_rate": 0.00010384350131975802, | |
| "loss": 1.2444, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5402954133195128, | |
| "grad_norm": 0.3881326675938924, | |
| "learning_rate": 0.00010339150971369906, | |
| "loss": 1.129, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.5415910857735164, | |
| "grad_norm": 0.3901794452377504, | |
| "learning_rate": 0.00010293944872952248, | |
| "loss": 1.1536, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5428867582275201, | |
| "grad_norm": 0.3852428804649106, | |
| "learning_rate": 0.00010248732761477371, | |
| "loss": 1.1365, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.5441824306815237, | |
| "grad_norm": 0.4261491575765494, | |
| "learning_rate": 0.0001020351556182282, | |
| "loss": 1.089, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5454781031355274, | |
| "grad_norm": 0.4070655687819442, | |
| "learning_rate": 0.00010158294198970232, | |
| "loss": 1.1591, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.546773775589531, | |
| "grad_norm": 0.4142505000419484, | |
| "learning_rate": 0.00010113069597986402, | |
| "loss": 1.1403, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5480694480435346, | |
| "grad_norm": 0.3988427337375973, | |
| "learning_rate": 0.0001006784268400437, | |
| "loss": 1.1043, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.5493651204975383, | |
| "grad_norm": 0.37926295342271793, | |
| "learning_rate": 0.00010022614382204492, | |
| "loss": 1.1694, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5506607929515418, | |
| "grad_norm": 0.3854042883512697, | |
| "learning_rate": 9.97738561779551e-05, | |
| "loss": 1.1891, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.5519564654055454, | |
| "grad_norm": 0.4035084032954952, | |
| "learning_rate": 9.932157315995631e-05, | |
| "loss": 1.1563, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5532521378595491, | |
| "grad_norm": 0.4083098281929601, | |
| "learning_rate": 9.8869304020136e-05, | |
| "loss": 1.2097, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.5545478103135527, | |
| "grad_norm": 0.3884516778316842, | |
| "learning_rate": 9.841705801029769e-05, | |
| "loss": 1.1932, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5558434827675564, | |
| "grad_norm": 0.38224759440627476, | |
| "learning_rate": 9.79648443817718e-05, | |
| "loss": 1.0978, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.55713915522156, | |
| "grad_norm": 0.46057864945040095, | |
| "learning_rate": 9.751267238522631e-05, | |
| "loss": 1.1465, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5584348276755636, | |
| "grad_norm": 0.38627135619303393, | |
| "learning_rate": 9.706055127047755e-05, | |
| "loss": 1.1533, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.5597305001295673, | |
| "grad_norm": 0.3891179649712941, | |
| "learning_rate": 9.660849028630096e-05, | |
| "loss": 1.1984, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5610261725835709, | |
| "grad_norm": 0.39223924604212307, | |
| "learning_rate": 9.615649868024199e-05, | |
| "loss": 1.1463, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.5623218450375745, | |
| "grad_norm": 0.39471896059848777, | |
| "learning_rate": 9.570458569842688e-05, | |
| "loss": 1.1487, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5636175174915782, | |
| "grad_norm": 0.3928874290731547, | |
| "learning_rate": 9.525276058537344e-05, | |
| "loss": 1.1315, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.5649131899455817, | |
| "grad_norm": 0.4064990206731607, | |
| "learning_rate": 9.480103258380198e-05, | |
| "loss": 1.1562, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5662088623995853, | |
| "grad_norm": 0.38415170548201955, | |
| "learning_rate": 9.434941093444632e-05, | |
| "loss": 1.1811, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.567504534853589, | |
| "grad_norm": 0.3859240132285136, | |
| "learning_rate": 9.389790487586465e-05, | |
| "loss": 1.2216, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5688002073075926, | |
| "grad_norm": 0.40615716009377495, | |
| "learning_rate": 9.344652364425065e-05, | |
| "loss": 1.2206, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.5700958797615963, | |
| "grad_norm": 0.3919524007695092, | |
| "learning_rate": 9.299527647324444e-05, | |
| "loss": 1.1929, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5713915522155999, | |
| "grad_norm": 0.3821208159345121, | |
| "learning_rate": 9.254417259374374e-05, | |
| "loss": 1.0911, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.5726872246696035, | |
| "grad_norm": 0.42022947326779125, | |
| "learning_rate": 9.2093221233715e-05, | |
| "loss": 1.1504, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5739828971236072, | |
| "grad_norm": 0.4544847994091584, | |
| "learning_rate": 9.164243161800477e-05, | |
| "loss": 1.1582, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.5752785695776108, | |
| "grad_norm": 0.41094059546634143, | |
| "learning_rate": 9.119181296815085e-05, | |
| "loss": 1.1297, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5765742420316144, | |
| "grad_norm": 0.3805641014764856, | |
| "learning_rate": 9.074137450219364e-05, | |
| "loss": 1.185, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.5778699144856181, | |
| "grad_norm": 0.39504497212816847, | |
| "learning_rate": 9.02911254344877e-05, | |
| "loss": 1.1875, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5791655869396216, | |
| "grad_norm": 0.40451192253045454, | |
| "learning_rate": 8.984107497551311e-05, | |
| "loss": 1.1368, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.5804612593936253, | |
| "grad_norm": 0.3769782624908954, | |
| "learning_rate": 8.939123233168717e-05, | |
| "loss": 1.1391, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.5817569318476289, | |
| "grad_norm": 0.4058748960564205, | |
| "learning_rate": 8.894160670517606e-05, | |
| "loss": 1.1751, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.5830526043016325, | |
| "grad_norm": 0.3881661677768996, | |
| "learning_rate": 8.849220729370651e-05, | |
| "loss": 1.1215, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5843482767556362, | |
| "grad_norm": 0.3807361487895863, | |
| "learning_rate": 8.804304329037771e-05, | |
| "loss": 1.1604, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.5856439492096398, | |
| "grad_norm": 0.3780073193073684, | |
| "learning_rate": 8.759412388347321e-05, | |
| "loss": 1.1159, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.5869396216636434, | |
| "grad_norm": 0.3910762750334457, | |
| "learning_rate": 8.71454582562731e-05, | |
| "loss": 1.1283, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.3577377830833829, | |
| "learning_rate": 8.669705558686599e-05, | |
| "loss": 1.0881, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.5895309665716507, | |
| "grad_norm": 0.38870803678431104, | |
| "learning_rate": 8.624892504796128e-05, | |
| "loss": 1.1302, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.5908266390256544, | |
| "grad_norm": 0.43127992879460963, | |
| "learning_rate": 8.580107580670163e-05, | |
| "loss": 1.183, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.592122311479658, | |
| "grad_norm": 0.37408901643032355, | |
| "learning_rate": 8.535351702447524e-05, | |
| "loss": 1.1624, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.5934179839336615, | |
| "grad_norm": 0.4003666810623533, | |
| "learning_rate": 8.49062578567287e-05, | |
| "loss": 1.1193, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5947136563876652, | |
| "grad_norm": 0.3723417367446364, | |
| "learning_rate": 8.445930745277953e-05, | |
| "loss": 1.146, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.5960093288416688, | |
| "grad_norm": 0.35036357860872624, | |
| "learning_rate": 8.401267495562894e-05, | |
| "loss": 1.0963, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5973050012956724, | |
| "grad_norm": 0.3969850486600124, | |
| "learning_rate": 8.356636950177509e-05, | |
| "loss": 1.1645, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.5986006737496761, | |
| "grad_norm": 0.3908066674173964, | |
| "learning_rate": 8.312040022102581e-05, | |
| "loss": 1.1573, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.5998963462036797, | |
| "grad_norm": 0.3970139193288793, | |
| "learning_rate": 8.26747762363122e-05, | |
| "loss": 1.1475, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.6011920186576833, | |
| "grad_norm": 0.3809149209886327, | |
| "learning_rate": 8.222950666350176e-05, | |
| "loss": 1.0996, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.602487691111687, | |
| "grad_norm": 0.3831688926512033, | |
| "learning_rate": 8.1784600611212e-05, | |
| "loss": 1.1586, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.6037833635656906, | |
| "grad_norm": 0.37892711696966613, | |
| "learning_rate": 8.134006718062417e-05, | |
| "loss": 1.1394, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6050790360196943, | |
| "grad_norm": 0.3796251460883534, | |
| "learning_rate": 8.089591546529686e-05, | |
| "loss": 1.1342, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.6063747084736979, | |
| "grad_norm": 0.3854947360629934, | |
| "learning_rate": 8.04521545509803e-05, | |
| "loss": 1.1234, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6076703809277014, | |
| "grad_norm": 0.39157237980540016, | |
| "learning_rate": 8.00087935154303e-05, | |
| "loss": 1.1789, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.6089660533817051, | |
| "grad_norm": 0.37973961502373016, | |
| "learning_rate": 7.956584142822248e-05, | |
| "loss": 1.1725, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6102617258357087, | |
| "grad_norm": 0.4002316998884078, | |
| "learning_rate": 7.912330735056702e-05, | |
| "loss": 1.1761, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.6115573982897123, | |
| "grad_norm": 0.3933317900533284, | |
| "learning_rate": 7.868120033512294e-05, | |
| "loss": 1.1279, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.612853070743716, | |
| "grad_norm": 0.3994518364881897, | |
| "learning_rate": 7.82395294258132e-05, | |
| "loss": 1.1292, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.6141487431977196, | |
| "grad_norm": 0.3774270174393722, | |
| "learning_rate": 7.779830365763963e-05, | |
| "loss": 1.1699, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6154444156517233, | |
| "grad_norm": 0.3936443342897645, | |
| "learning_rate": 7.735753205649798e-05, | |
| "loss": 1.1691, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.6167400881057269, | |
| "grad_norm": 0.3958990632556032, | |
| "learning_rate": 7.691722363899346e-05, | |
| "loss": 1.1053, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6180357605597305, | |
| "grad_norm": 0.40113089490170173, | |
| "learning_rate": 7.647738741225605e-05, | |
| "loss": 1.0826, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.6193314330137342, | |
| "grad_norm": 0.39930864888851864, | |
| "learning_rate": 7.60380323737566e-05, | |
| "loss": 1.199, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6206271054677378, | |
| "grad_norm": 0.3962521144461154, | |
| "learning_rate": 7.559916751112248e-05, | |
| "loss": 1.1933, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.6219227779217413, | |
| "grad_norm": 0.37888926408124746, | |
| "learning_rate": 7.516080180195379e-05, | |
| "loss": 1.1713, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.623218450375745, | |
| "grad_norm": 0.3708133442805237, | |
| "learning_rate": 7.472294421363982e-05, | |
| "loss": 1.2177, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.6245141228297486, | |
| "grad_norm": 0.41709292403420495, | |
| "learning_rate": 7.428560370317542e-05, | |
| "loss": 1.1678, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6258097952837522, | |
| "grad_norm": 0.38822425697798224, | |
| "learning_rate": 7.3848789216978e-05, | |
| "loss": 1.1628, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.6271054677377559, | |
| "grad_norm": 0.38293720555111366, | |
| "learning_rate": 7.341250969070435e-05, | |
| "loss": 1.1668, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6284011401917595, | |
| "grad_norm": 0.3982879136320185, | |
| "learning_rate": 7.297677404906787e-05, | |
| "loss": 1.1808, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.6296968126457632, | |
| "grad_norm": 0.4970736508224388, | |
| "learning_rate": 7.254159120565614e-05, | |
| "loss": 1.1915, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6309924850997668, | |
| "grad_norm": 0.3859126967679283, | |
| "learning_rate": 7.210697006274829e-05, | |
| "loss": 1.1032, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.6322881575537704, | |
| "grad_norm": 0.3804578357151546, | |
| "learning_rate": 7.167291951113322e-05, | |
| "loss": 1.1567, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6335838300077741, | |
| "grad_norm": 0.38190993552219266, | |
| "learning_rate": 7.123944842992759e-05, | |
| "loss": 1.091, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.6348795024617777, | |
| "grad_norm": 0.38091685226387034, | |
| "learning_rate": 7.080656568639406e-05, | |
| "loss": 1.1457, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6361751749157812, | |
| "grad_norm": 0.3853362546606581, | |
| "learning_rate": 7.037428013576013e-05, | |
| "loss": 1.1463, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.6374708473697849, | |
| "grad_norm": 0.36970236476000445, | |
| "learning_rate": 6.994260062103674e-05, | |
| "loss": 1.1393, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.6387665198237885, | |
| "grad_norm": 0.40400326202643533, | |
| "learning_rate": 6.951153597283766e-05, | |
| "loss": 1.1295, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.6400621922777922, | |
| "grad_norm": 0.3816874564299913, | |
| "learning_rate": 6.908109500919861e-05, | |
| "loss": 1.1662, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6413578647317958, | |
| "grad_norm": 0.3799563658685613, | |
| "learning_rate": 6.865128653539699e-05, | |
| "loss": 1.1876, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.6426535371857994, | |
| "grad_norm": 0.379580454677419, | |
| "learning_rate": 6.822211934377176e-05, | |
| "loss": 1.1005, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.6439492096398031, | |
| "grad_norm": 0.36403149625971076, | |
| "learning_rate": 6.77936022135434e-05, | |
| "loss": 1.0924, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.6452448820938067, | |
| "grad_norm": 0.3864263981944349, | |
| "learning_rate": 6.736574391063466e-05, | |
| "loss": 1.166, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.6465405545478103, | |
| "grad_norm": 0.4278354770987037, | |
| "learning_rate": 6.693855318749096e-05, | |
| "loss": 1.1463, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.647836227001814, | |
| "grad_norm": 0.3881716691518243, | |
| "learning_rate": 6.651203878290139e-05, | |
| "loss": 1.1658, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6491318994558176, | |
| "grad_norm": 0.3773480140369342, | |
| "learning_rate": 6.608620942182011e-05, | |
| "loss": 1.1561, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.6504275719098211, | |
| "grad_norm": 0.38362919406181467, | |
| "learning_rate": 6.566107381518758e-05, | |
| "loss": 1.1553, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.6517232443638248, | |
| "grad_norm": 0.3889927393341413, | |
| "learning_rate": 6.523664065975268e-05, | |
| "loss": 1.0927, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.6530189168178284, | |
| "grad_norm": 0.3770322611579158, | |
| "learning_rate": 6.481291863789461e-05, | |
| "loss": 1.1621, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.6543145892718321, | |
| "grad_norm": 0.3583870387014761, | |
| "learning_rate": 6.43899164174453e-05, | |
| "loss": 1.1688, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.6556102617258357, | |
| "grad_norm": 0.36282391266493386, | |
| "learning_rate": 6.396764265151221e-05, | |
| "loss": 1.0909, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.6569059341798393, | |
| "grad_norm": 0.40358089248833323, | |
| "learning_rate": 6.35461059783011e-05, | |
| "loss": 1.185, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.658201606633843, | |
| "grad_norm": 0.3622883465103346, | |
| "learning_rate": 6.312531502093958e-05, | |
| "loss": 1.1586, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6594972790878466, | |
| "grad_norm": 0.39189718464338913, | |
| "learning_rate": 6.270527838730053e-05, | |
| "loss": 1.1592, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.6607929515418502, | |
| "grad_norm": 0.36779638890391525, | |
| "learning_rate": 6.228600466982611e-05, | |
| "loss": 1.156, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6620886239958539, | |
| "grad_norm": 0.37597682220271855, | |
| "learning_rate": 6.186750244535194e-05, | |
| "loss": 1.1353, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.6633842964498575, | |
| "grad_norm": 0.3886787309603002, | |
| "learning_rate": 6.144978027493158e-05, | |
| "loss": 1.1341, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.664679968903861, | |
| "grad_norm": 0.35942315304405126, | |
| "learning_rate": 6.103284670366162e-05, | |
| "loss": 1.1597, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.6659756413578647, | |
| "grad_norm": 0.41146010175047676, | |
| "learning_rate": 6.061671026050668e-05, | |
| "loss": 1.18, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6672713138118683, | |
| "grad_norm": 0.38590903226923434, | |
| "learning_rate": 6.0201379458124964e-05, | |
| "loss": 1.1639, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.668566986265872, | |
| "grad_norm": 0.386535230930437, | |
| "learning_rate": 5.978686279269421e-05, | |
| "loss": 1.0866, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6698626587198756, | |
| "grad_norm": 0.3877441934910258, | |
| "learning_rate": 5.9373168743737864e-05, | |
| "loss": 1.1325, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.6711583311738792, | |
| "grad_norm": 0.3990007152806006, | |
| "learning_rate": 5.896030577395144e-05, | |
| "loss": 1.1188, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.6724540036278829, | |
| "grad_norm": 0.3970387094956777, | |
| "learning_rate": 5.854828232902976e-05, | |
| "loss": 1.1939, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.6737496760818865, | |
| "grad_norm": 0.39839395255304766, | |
| "learning_rate": 5.813710683749379e-05, | |
| "loss": 1.2078, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6750453485358902, | |
| "grad_norm": 0.39807324424891044, | |
| "learning_rate": 5.772678771051858e-05, | |
| "loss": 1.1513, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.6763410209898938, | |
| "grad_norm": 0.371233691663641, | |
| "learning_rate": 5.7317333341760906e-05, | |
| "loss": 1.1896, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6776366934438974, | |
| "grad_norm": 0.3914320864664493, | |
| "learning_rate": 5.690875210718778e-05, | |
| "loss": 1.144, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.678932365897901, | |
| "grad_norm": 0.38952180289646227, | |
| "learning_rate": 5.6501052364904906e-05, | |
| "loss": 1.1669, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.6802280383519046, | |
| "grad_norm": 0.3623267889899647, | |
| "learning_rate": 5.6094242454986e-05, | |
| "loss": 1.179, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.6815237108059082, | |
| "grad_norm": 0.3637529461983289, | |
| "learning_rate": 5.568833069930186e-05, | |
| "loss": 1.1244, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.6828193832599119, | |
| "grad_norm": 0.387937514120572, | |
| "learning_rate": 5.528332540135031e-05, | |
| "loss": 1.1699, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.6841150557139155, | |
| "grad_norm": 0.37878126929029915, | |
| "learning_rate": 5.487923484608629e-05, | |
| "loss": 1.131, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.6854107281679191, | |
| "grad_norm": 0.3903173526956151, | |
| "learning_rate": 5.4476067299752385e-05, | |
| "loss": 1.1165, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.6867064006219228, | |
| "grad_norm": 0.3857342653067374, | |
| "learning_rate": 5.4073831009709805e-05, | |
| "loss": 1.0854, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.6880020730759264, | |
| "grad_norm": 0.3661818366505174, | |
| "learning_rate": 5.367253420426952e-05, | |
| "loss": 1.173, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.6892977455299301, | |
| "grad_norm": 0.3695899768899783, | |
| "learning_rate": 5.3272185092524004e-05, | |
| "loss": 1.1917, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.6905934179839337, | |
| "grad_norm": 0.3709048127868108, | |
| "learning_rate": 5.287279186417938e-05, | |
| "loss": 1.1595, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.6918890904379373, | |
| "grad_norm": 0.388715290668864, | |
| "learning_rate": 5.2474362689387745e-05, | |
| "loss": 1.1681, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.6931847628919409, | |
| "grad_norm": 0.41572003898003435, | |
| "learning_rate": 5.207690571858025e-05, | |
| "loss": 1.1344, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.6944804353459445, | |
| "grad_norm": 0.3688637831571822, | |
| "learning_rate": 5.1680429082300134e-05, | |
| "loss": 1.1911, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.6957761077999481, | |
| "grad_norm": 0.3793170468762598, | |
| "learning_rate": 5.128494089103652e-05, | |
| "loss": 1.1127, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.6970717802539518, | |
| "grad_norm": 0.43134296541622413, | |
| "learning_rate": 5.0890449235058525e-05, | |
| "loss": 1.1784, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.6983674527079554, | |
| "grad_norm": 0.3635096529171273, | |
| "learning_rate": 5.049696218424969e-05, | |
| "loss": 1.1428, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.699663125161959, | |
| "grad_norm": 0.3878749986675832, | |
| "learning_rate": 5.010448778794303e-05, | |
| "loss": 1.1741, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7009587976159627, | |
| "grad_norm": 0.40169342624996973, | |
| "learning_rate": 4.971303407475618e-05, | |
| "loss": 1.1599, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.7022544700699663, | |
| "grad_norm": 0.3694514584240007, | |
| "learning_rate": 4.932260905242731e-05, | |
| "loss": 1.1187, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.70355014252397, | |
| "grad_norm": 0.40107470744868656, | |
| "learning_rate": 4.893322070765126e-05, | |
| "loss": 1.1087, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.7048458149779736, | |
| "grad_norm": 0.3973309016316779, | |
| "learning_rate": 4.8544877005916126e-05, | |
| "loss": 1.1353, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7061414874319772, | |
| "grad_norm": 0.36945482581363814, | |
| "learning_rate": 4.815758589134046e-05, | |
| "loss": 1.1537, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.7074371598859808, | |
| "grad_norm": 0.39405014852767495, | |
| "learning_rate": 4.777135528651058e-05, | |
| "loss": 1.1864, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.7087328323399844, | |
| "grad_norm": 0.36438744458294, | |
| "learning_rate": 4.738619309231857e-05, | |
| "loss": 1.1571, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.710028504793988, | |
| "grad_norm": 0.3718851263620155, | |
| "learning_rate": 4.700210718780072e-05, | |
| "loss": 1.1375, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7113241772479917, | |
| "grad_norm": 0.3968100064213781, | |
| "learning_rate": 4.6619105429976193e-05, | |
| "loss": 1.0992, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.7126198497019953, | |
| "grad_norm": 0.3735407865710053, | |
| "learning_rate": 4.623719565368657e-05, | |
| "loss": 1.154, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.713915522155999, | |
| "grad_norm": 0.3855769481718261, | |
| "learning_rate": 4.585638567143529e-05, | |
| "loss": 1.1087, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.7152111946100026, | |
| "grad_norm": 0.39904660177632, | |
| "learning_rate": 4.547668327322796e-05, | |
| "loss": 1.1557, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7165068670640062, | |
| "grad_norm": 0.37552561838186715, | |
| "learning_rate": 4.5098096226413e-05, | |
| "loss": 1.1788, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.7178025395180099, | |
| "grad_norm": 0.39324730815800873, | |
| "learning_rate": 4.472063227552274e-05, | |
| "loss": 1.1688, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7190982119720135, | |
| "grad_norm": 0.38301296634310333, | |
| "learning_rate": 4.434429914211508e-05, | |
| "loss": 1.1711, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.7203938844260172, | |
| "grad_norm": 0.3709443062095975, | |
| "learning_rate": 4.396910452461532e-05, | |
| "loss": 1.1718, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7216895568800207, | |
| "grad_norm": 0.39239048686699185, | |
| "learning_rate": 4.3595056098158906e-05, | |
| "loss": 1.1574, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.7229852293340243, | |
| "grad_norm": 0.41357484414155377, | |
| "learning_rate": 4.322216151443428e-05, | |
| "loss": 1.1074, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.724280901788028, | |
| "grad_norm": 0.3672227895875153, | |
| "learning_rate": 4.2850428401526376e-05, | |
| "loss": 1.1028, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.7255765742420316, | |
| "grad_norm": 0.3664872416848814, | |
| "learning_rate": 4.2479864363760726e-05, | |
| "loss": 1.1508, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7268722466960352, | |
| "grad_norm": 0.3710822782612391, | |
| "learning_rate": 4.211047698154765e-05, | |
| "loss": 1.1369, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.7281679191500389, | |
| "grad_norm": 0.38199563385579133, | |
| "learning_rate": 4.1742273811227395e-05, | |
| "loss": 1.1716, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7294635916040425, | |
| "grad_norm": 0.36978932404422615, | |
| "learning_rate": 4.1375262384915433e-05, | |
| "loss": 1.1613, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.7307592640580461, | |
| "grad_norm": 0.37568319756030377, | |
| "learning_rate": 4.100945021034843e-05, | |
| "loss": 1.1396, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.7320549365120498, | |
| "grad_norm": 0.3802598263999195, | |
| "learning_rate": 4.064484477073074e-05, | |
| "loss": 1.172, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.7333506089660534, | |
| "grad_norm": 0.3690252573947318, | |
| "learning_rate": 4.028145352458118e-05, | |
| "loss": 1.1413, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.7346462814200571, | |
| "grad_norm": 0.3942754823686454, | |
| "learning_rate": 3.991928390558054e-05, | |
| "loss": 1.1069, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.7359419538740606, | |
| "grad_norm": 0.3720821371337478, | |
| "learning_rate": 3.95583433224195e-05, | |
| "loss": 1.0835, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.7372376263280642, | |
| "grad_norm": 0.3807303246565132, | |
| "learning_rate": 3.9198639158647056e-05, | |
| "loss": 1.1501, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.7385332987820679, | |
| "grad_norm": 0.36231750839777666, | |
| "learning_rate": 3.884017877251959e-05, | |
| "loss": 1.0563, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.7398289712360715, | |
| "grad_norm": 0.388874708381432, | |
| "learning_rate": 3.8482969496850166e-05, | |
| "loss": 1.0889, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.7411246436900751, | |
| "grad_norm": 0.38013479771110215, | |
| "learning_rate": 3.812701863885865e-05, | |
| "loss": 1.1145, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.7424203161440788, | |
| "grad_norm": 0.3717583300327742, | |
| "learning_rate": 3.7772333480022185e-05, | |
| "loss": 1.0663, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.7437159885980824, | |
| "grad_norm": 0.36405208418925755, | |
| "learning_rate": 3.741892127592625e-05, | |
| "loss": 1.1102, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.745011661052086, | |
| "grad_norm": 0.3723066034084609, | |
| "learning_rate": 3.706678925611629e-05, | |
| "loss": 1.1602, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.7463073335060897, | |
| "grad_norm": 0.3783951434167031, | |
| "learning_rate": 3.67159446239497e-05, | |
| "loss": 1.1233, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.7476030059600933, | |
| "grad_norm": 0.37951067018123125, | |
| "learning_rate": 3.636639455644858e-05, | |
| "loss": 1.0545, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.748898678414097, | |
| "grad_norm": 0.37421338215744115, | |
| "learning_rate": 3.601814620415296e-05, | |
| "loss": 1.1367, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.7501943508681005, | |
| "grad_norm": 0.3603159123651838, | |
| "learning_rate": 3.567120669097422e-05, | |
| "loss": 1.0909, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.7514900233221041, | |
| "grad_norm": 0.3788231837374063, | |
| "learning_rate": 3.532558311404986e-05, | |
| "loss": 1.1441, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7527856957761078, | |
| "grad_norm": 0.3840910679589615, | |
| "learning_rate": 3.498128254359788e-05, | |
| "loss": 1.1403, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.7540813682301114, | |
| "grad_norm": 0.3753865209395276, | |
| "learning_rate": 3.4638312022772335e-05, | |
| "loss": 1.1004, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.755377040684115, | |
| "grad_norm": 0.3954242259257984, | |
| "learning_rate": 3.4296678567519345e-05, | |
| "loss": 1.1347, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.7566727131381187, | |
| "grad_norm": 0.3741932569564154, | |
| "learning_rate": 3.3956389166433276e-05, | |
| "loss": 1.0908, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.7579683855921223, | |
| "grad_norm": 0.3930950040373874, | |
| "learning_rate": 3.361745078061416e-05, | |
| "loss": 1.1988, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.759264058046126, | |
| "grad_norm": 0.3685363645101882, | |
| "learning_rate": 3.327987034352499e-05, | |
| "loss": 1.2067, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.7605597305001296, | |
| "grad_norm": 0.3595878774367224, | |
| "learning_rate": 3.294365476085001e-05, | |
| "loss": 1.1128, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.7618554029541332, | |
| "grad_norm": 0.3823858203204671, | |
| "learning_rate": 3.260881091035356e-05, | |
| "loss": 1.1894, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.7631510754081369, | |
| "grad_norm": 0.35550275725905045, | |
| "learning_rate": 3.227534564173903e-05, | |
| "loss": 1.0744, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.7644467478621404, | |
| "grad_norm": 0.38923682484253824, | |
| "learning_rate": 3.1943265776509215e-05, | |
| "loss": 1.1599, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.765742420316144, | |
| "grad_norm": 0.3789384247163881, | |
| "learning_rate": 3.1612578107826375e-05, | |
| "loss": 1.1504, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.7670380927701477, | |
| "grad_norm": 0.3620246203990784, | |
| "learning_rate": 3.128328940037345e-05, | |
| "loss": 1.0777, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.7683337652241513, | |
| "grad_norm": 0.36815557923240855, | |
| "learning_rate": 3.095540639021578e-05, | |
| "loss": 1.1174, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.769629437678155, | |
| "grad_norm": 0.3547702710213197, | |
| "learning_rate": 3.0628935784662947e-05, | |
| "loss": 1.1485, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.7709251101321586, | |
| "grad_norm": 0.3781571691176777, | |
| "learning_rate": 3.030388426213202e-05, | |
| "loss": 1.1883, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.7722207825861622, | |
| "grad_norm": 0.40077870265330406, | |
| "learning_rate": 2.9980258472010624e-05, | |
| "loss": 1.1932, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.7735164550401659, | |
| "grad_norm": 0.4044666107500387, | |
| "learning_rate": 2.9658065034520978e-05, | |
| "loss": 1.1241, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.7748121274941695, | |
| "grad_norm": 0.38599774202480436, | |
| "learning_rate": 2.9337310540584662e-05, | |
| "loss": 1.1289, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7761077999481731, | |
| "grad_norm": 0.3937959908311204, | |
| "learning_rate": 2.90180015516874e-05, | |
| "loss": 1.124, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.7774034724021768, | |
| "grad_norm": 0.3632550787528134, | |
| "learning_rate": 2.8700144599745304e-05, | |
| "loss": 1.054, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7786991448561803, | |
| "grad_norm": 0.3622147641579977, | |
| "learning_rate": 2.8383746186970885e-05, | |
| "loss": 1.1541, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.7799948173101839, | |
| "grad_norm": 0.37112400149549435, | |
| "learning_rate": 2.806881278574016e-05, | |
| "loss": 1.0959, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.7812904897641876, | |
| "grad_norm": 0.35896620554471, | |
| "learning_rate": 2.7755350838460437e-05, | |
| "loss": 1.1097, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.7825861622181912, | |
| "grad_norm": 0.3722645573928852, | |
| "learning_rate": 2.7443366757438084e-05, | |
| "loss": 1.1536, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.7838818346721949, | |
| "grad_norm": 0.3733728824194641, | |
| "learning_rate": 2.71328669247479e-05, | |
| "loss": 1.0988, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.7851775071261985, | |
| "grad_norm": 0.39726305827044567, | |
| "learning_rate": 2.6823857692102115e-05, | |
| "loss": 1.1347, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.7864731795802021, | |
| "grad_norm": 0.3757164187600116, | |
| "learning_rate": 2.6516345380720685e-05, | |
| "loss": 1.1498, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.7877688520342058, | |
| "grad_norm": 0.3795959004315457, | |
| "learning_rate": 2.6210336281201996e-05, | |
| "loss": 1.1716, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.7890645244882094, | |
| "grad_norm": 0.35714774223397056, | |
| "learning_rate": 2.5905836653393955e-05, | |
| "loss": 1.1829, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.790360196942213, | |
| "grad_norm": 0.3654895953286083, | |
| "learning_rate": 2.5602852726266246e-05, | |
| "loss": 1.1138, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7916558693962167, | |
| "grad_norm": 0.3790228967067868, | |
| "learning_rate": 2.53013906977827e-05, | |
| "loss": 1.1361, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.7929515418502202, | |
| "grad_norm": 0.3766214688716055, | |
| "learning_rate": 2.500145673477452e-05, | |
| "loss": 1.1386, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.7942472143042238, | |
| "grad_norm": 0.35689403644408657, | |
| "learning_rate": 2.4703056972814298e-05, | |
| "loss": 1.1716, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.7955428867582275, | |
| "grad_norm": 0.3837337630770634, | |
| "learning_rate": 2.44061975160902e-05, | |
| "loss": 1.1253, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.7968385592122311, | |
| "grad_norm": 0.3617908256654209, | |
| "learning_rate": 2.4110884437281433e-05, | |
| "loss": 1.1669, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.7981342316662348, | |
| "grad_norm": 0.35978473851190906, | |
| "learning_rate": 2.381712377743379e-05, | |
| "loss": 1.1195, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.7994299041202384, | |
| "grad_norm": 0.36747445883447966, | |
| "learning_rate": 2.352492154583611e-05, | |
| "loss": 1.1494, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.800725576574242, | |
| "grad_norm": 0.38717115547376924, | |
| "learning_rate": 2.323428371989752e-05, | |
| "loss": 1.1342, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.8020212490282457, | |
| "grad_norm": 0.39093054065016897, | |
| "learning_rate": 2.2945216245024804e-05, | |
| "loss": 1.1304, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.8033169214822493, | |
| "grad_norm": 0.368756140981371, | |
| "learning_rate": 2.265772503450122e-05, | |
| "loss": 1.1902, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.804612593936253, | |
| "grad_norm": 0.3877482016759796, | |
| "learning_rate": 2.237181596936515e-05, | |
| "loss": 1.2071, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.8059082663902566, | |
| "grad_norm": 0.37792584298436904, | |
| "learning_rate": 2.2087494898290084e-05, | |
| "loss": 1.1263, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8072039388442601, | |
| "grad_norm": 0.377175581709425, | |
| "learning_rate": 2.1804767637464783e-05, | |
| "loss": 1.1409, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.8084996112982638, | |
| "grad_norm": 0.3675212675701305, | |
| "learning_rate": 2.152363997047432e-05, | |
| "loss": 1.1222, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.8097952837522674, | |
| "grad_norm": 0.38222618034509426, | |
| "learning_rate": 2.1244117648181926e-05, | |
| "loss": 1.1282, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.811090956206271, | |
| "grad_norm": 0.36558116292433085, | |
| "learning_rate": 2.0966206388611177e-05, | |
| "loss": 1.1599, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.8123866286602747, | |
| "grad_norm": 0.3727524846387591, | |
| "learning_rate": 2.0689911876829127e-05, | |
| "loss": 1.1496, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.8136823011142783, | |
| "grad_norm": 0.3628037534722254, | |
| "learning_rate": 2.0415239764829976e-05, | |
| "loss": 1.1416, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.8149779735682819, | |
| "grad_norm": 0.3761023568840631, | |
| "learning_rate": 2.014219567141932e-05, | |
| "loss": 1.0828, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.8162736460222856, | |
| "grad_norm": 0.3504341651004141, | |
| "learning_rate": 1.9870785182099505e-05, | |
| "loss": 1.0956, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8175693184762892, | |
| "grad_norm": 0.3734885666536526, | |
| "learning_rate": 1.960101384895511e-05, | |
| "loss": 1.1044, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.8188649909302929, | |
| "grad_norm": 0.386535956936462, | |
| "learning_rate": 1.9332887190539516e-05, | |
| "loss": 1.2004, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.8201606633842965, | |
| "grad_norm": 0.3646373120550804, | |
| "learning_rate": 1.9066410691761937e-05, | |
| "loss": 1.1377, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.8214563358383, | |
| "grad_norm": 0.3715486267985764, | |
| "learning_rate": 1.8801589803775154e-05, | |
| "loss": 1.1128, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.8227520082923037, | |
| "grad_norm": 0.3789134249781072, | |
| "learning_rate": 1.8538429943864244e-05, | |
| "loss": 1.1429, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.8240476807463073, | |
| "grad_norm": 0.4352073640959862, | |
| "learning_rate": 1.8276936495335485e-05, | |
| "loss": 1.1167, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.8253433532003109, | |
| "grad_norm": 0.3681435590124036, | |
| "learning_rate": 1.8017114807406478e-05, | |
| "loss": 1.1063, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.8266390256543146, | |
| "grad_norm": 0.3694513270515317, | |
| "learning_rate": 1.775897019509649e-05, | |
| "loss": 1.1836, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.8279346981083182, | |
| "grad_norm": 0.388666401561133, | |
| "learning_rate": 1.7502507939117897e-05, | |
| "loss": 1.1396, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.8292303705623219, | |
| "grad_norm": 0.3687281974613644, | |
| "learning_rate": 1.7247733285768098e-05, | |
| "loss": 1.1352, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8305260430163255, | |
| "grad_norm": 0.3733462546424826, | |
| "learning_rate": 1.6994651446822153e-05, | |
| "loss": 1.125, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.8318217154703291, | |
| "grad_norm": 0.3745095547011446, | |
| "learning_rate": 1.6743267599426303e-05, | |
| "loss": 1.1645, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.8331173879243328, | |
| "grad_norm": 0.36579848745894494, | |
| "learning_rate": 1.649358688599191e-05, | |
| "loss": 1.1373, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.8344130603783364, | |
| "grad_norm": 0.37060439276258045, | |
| "learning_rate": 1.624561441409034e-05, | |
| "loss": 1.0775, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.83570873283234, | |
| "grad_norm": 0.38418584872461575, | |
| "learning_rate": 1.5999355256348448e-05, | |
| "loss": 1.1678, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.8370044052863436, | |
| "grad_norm": 0.3814911739387983, | |
| "learning_rate": 1.5754814450344845e-05, | |
| "loss": 1.1772, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.8383000777403472, | |
| "grad_norm": 0.3642771788771785, | |
| "learning_rate": 1.5511996998506883e-05, | |
| "loss": 1.1279, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.8395957501943508, | |
| "grad_norm": 0.3798091083229991, | |
| "learning_rate": 1.527090786800821e-05, | |
| "loss": 1.1663, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.8408914226483545, | |
| "grad_norm": 0.3621301040720381, | |
| "learning_rate": 1.5031551990667236e-05, | |
| "loss": 1.0961, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.8421870951023581, | |
| "grad_norm": 0.3716900347036682, | |
| "learning_rate": 1.4793934262846232e-05, | |
| "loss": 1.1246, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.8434827675563618, | |
| "grad_norm": 0.3431347363725958, | |
| "learning_rate": 1.4558059545351143e-05, | |
| "loss": 1.126, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.8447784400103654, | |
| "grad_norm": 0.38516161930376763, | |
| "learning_rate": 1.4323932663332251e-05, | |
| "loss": 1.1487, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.846074112464369, | |
| "grad_norm": 0.36582214467557017, | |
| "learning_rate": 1.4091558406185335e-05, | |
| "loss": 1.1464, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.8473697849183727, | |
| "grad_norm": 0.3600375664520519, | |
| "learning_rate": 1.3860941527453786e-05, | |
| "loss": 1.1331, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.8486654573723763, | |
| "grad_norm": 0.36937163487487523, | |
| "learning_rate": 1.3632086744731299e-05, | |
| "loss": 1.1, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.84996112982638, | |
| "grad_norm": 0.37811170010742284, | |
| "learning_rate": 1.3404998739565433e-05, | |
| "loss": 1.1278, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.8512568022803835, | |
| "grad_norm": 0.36422197248748484, | |
| "learning_rate": 1.3179682157361872e-05, | |
| "loss": 1.1193, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.8525524747343871, | |
| "grad_norm": 0.37165480607350426, | |
| "learning_rate": 1.2956141607289262e-05, | |
| "loss": 1.1615, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.8538481471883907, | |
| "grad_norm": 0.37701311154160283, | |
| "learning_rate": 1.2734381662185035e-05, | |
| "loss": 1.1036, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.8551438196423944, | |
| "grad_norm": 0.3815170151959314, | |
| "learning_rate": 1.2514406858461847e-05, | |
| "loss": 1.1735, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.856439492096398, | |
| "grad_norm": 0.37919701821847407, | |
| "learning_rate": 1.2296221696014732e-05, | |
| "loss": 1.1837, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.8577351645504017, | |
| "grad_norm": 0.36743928128733283, | |
| "learning_rate": 1.2079830638129164e-05, | |
| "loss": 1.1148, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.8590308370044053, | |
| "grad_norm": 0.3738619978563127, | |
| "learning_rate": 1.1865238111389588e-05, | |
| "loss": 1.1604, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.8603265094584089, | |
| "grad_norm": 0.3541904420302459, | |
| "learning_rate": 1.1652448505588998e-05, | |
| "loss": 1.1544, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.8616221819124126, | |
| "grad_norm": 0.36684430135825935, | |
| "learning_rate": 1.1441466173639092e-05, | |
| "loss": 1.156, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.8629178543664162, | |
| "grad_norm": 0.37365304474041194, | |
| "learning_rate": 1.1232295431481222e-05, | |
| "loss": 1.1084, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.8642135268204199, | |
| "grad_norm": 0.3810375689519541, | |
| "learning_rate": 1.1024940557998143e-05, | |
| "loss": 1.1386, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.8655091992744234, | |
| "grad_norm": 0.3673719354100514, | |
| "learning_rate": 1.0819405794926418e-05, | |
| "loss": 1.1629, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.866804871728427, | |
| "grad_norm": 0.37342554741406375, | |
| "learning_rate": 1.0615695346769693e-05, | |
| "loss": 1.1134, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.8681005441824307, | |
| "grad_norm": 0.3635218597663001, | |
| "learning_rate": 1.0413813380712701e-05, | |
| "loss": 1.1147, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.8693962166364343, | |
| "grad_norm": 0.3570736451827848, | |
| "learning_rate": 1.0213764026535921e-05, | |
| "loss": 1.1321, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.8706918890904379, | |
| "grad_norm": 0.3891720605828237, | |
| "learning_rate": 1.0015551376531296e-05, | |
| "loss": 1.1001, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.8719875615444416, | |
| "grad_norm": 0.37097051363645606, | |
| "learning_rate": 9.819179485418273e-06, | |
| "loss": 1.1241, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.8732832339984452, | |
| "grad_norm": 0.3917978913676709, | |
| "learning_rate": 9.624652370261034e-06, | |
| "loss": 1.1304, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.8745789064524488, | |
| "grad_norm": 0.37226262222810586, | |
| "learning_rate": 9.431974010386258e-06, | |
| "loss": 1.1421, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.8758745789064525, | |
| "grad_norm": 0.3751233666218166, | |
| "learning_rate": 9.24114834730171e-06, | |
| "loss": 1.1022, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.8771702513604561, | |
| "grad_norm": 0.36555054148949856, | |
| "learning_rate": 9.052179284615658e-06, | |
| "loss": 1.13, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.8784659238144598, | |
| "grad_norm": 0.38148359927958986, | |
| "learning_rate": 8.865070687956977e-06, | |
| "loss": 1.1894, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.8797615962684633, | |
| "grad_norm": 0.416874595298691, | |
| "learning_rate": 8.679826384896061e-06, | |
| "loss": 1.1736, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.8810572687224669, | |
| "grad_norm": 0.387504501992165, | |
| "learning_rate": 8.496450164866565e-06, | |
| "loss": 1.1511, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 0.34801518476142257, | |
| "learning_rate": 8.314945779087858e-06, | |
| "loss": 1.1224, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.8836486136304742, | |
| "grad_norm": 0.3758149676256447, | |
| "learning_rate": 8.135316940488347e-06, | |
| "loss": 1.1448, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.8849442860844778, | |
| "grad_norm": 0.36726096701274225, | |
| "learning_rate": 7.9575673236294e-06, | |
| "loss": 1.1719, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.8862399585384815, | |
| "grad_norm": 0.3578469295777726, | |
| "learning_rate": 7.781700564630311e-06, | |
| "loss": 1.1385, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.8875356309924851, | |
| "grad_norm": 0.38263190643853745, | |
| "learning_rate": 7.6077202610938205e-06, | |
| "loss": 1.177, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.8888313034464888, | |
| "grad_norm": 0.38665333786102024, | |
| "learning_rate": 7.435629972032565e-06, | |
| "loss": 1.1358, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.8901269759004924, | |
| "grad_norm": 0.36097779607915553, | |
| "learning_rate": 7.265433217796302e-06, | |
| "loss": 1.1055, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.891422648354496, | |
| "grad_norm": 0.4052378204511826, | |
| "learning_rate": 7.0971334799998404e-06, | |
| "loss": 1.0912, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.8927183208084997, | |
| "grad_norm": 0.3704412054341248, | |
| "learning_rate": 6.930734201451816e-06, | |
| "loss": 1.1591, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.8940139932625032, | |
| "grad_norm": 0.37555016415939446, | |
| "learning_rate": 6.7662387860843225e-06, | |
| "loss": 1.1149, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8953096657165068, | |
| "grad_norm": 0.3798653467067839, | |
| "learning_rate": 6.603650598883226e-06, | |
| "loss": 1.0958, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.8966053381705105, | |
| "grad_norm": 0.34342261742501184, | |
| "learning_rate": 6.4429729658193714e-06, | |
| "loss": 1.0718, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.8979010106245141, | |
| "grad_norm": 0.38053269516746213, | |
| "learning_rate": 6.284209173780498e-06, | |
| "loss": 1.1133, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.8991966830785177, | |
| "grad_norm": 0.38564284280238426, | |
| "learning_rate": 6.127362470504027e-06, | |
| "loss": 1.1239, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.9004923555325214, | |
| "grad_norm": 0.3624213790138937, | |
| "learning_rate": 5.972436064510667e-06, | |
| "loss": 1.1766, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.901788027986525, | |
| "grad_norm": 0.36614157981464224, | |
| "learning_rate": 5.819433125038643e-06, | |
| "loss": 1.146, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.9030837004405287, | |
| "grad_norm": 0.39479288263797235, | |
| "learning_rate": 5.668356781979056e-06, | |
| "loss": 1.1669, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 0.9043793728945323, | |
| "grad_norm": 0.35385937176801885, | |
| "learning_rate": 5.519210125811713e-06, | |
| "loss": 1.1224, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.9056750453485359, | |
| "grad_norm": 0.3778900407632985, | |
| "learning_rate": 5.371996207541941e-06, | |
| "loss": 1.1287, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.9069707178025396, | |
| "grad_norm": 0.35939889684365445, | |
| "learning_rate": 5.226718038638234e-06, | |
| "loss": 1.1274, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9082663902565431, | |
| "grad_norm": 0.38058001351217496, | |
| "learning_rate": 5.083378590970511e-06, | |
| "loss": 1.154, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 0.9095620627105467, | |
| "grad_norm": 0.36339105009188977, | |
| "learning_rate": 4.941980796749524e-06, | |
| "loss": 1.1234, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.9108577351645504, | |
| "grad_norm": 0.3753439481173232, | |
| "learning_rate": 4.802527548466684e-06, | |
| "loss": 1.0982, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 0.912153407618554, | |
| "grad_norm": 0.3547606097647797, | |
| "learning_rate": 4.665021698834981e-06, | |
| "loss": 1.1405, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.9134490800725577, | |
| "grad_norm": 0.3804573132062168, | |
| "learning_rate": 4.529466060730681e-06, | |
| "loss": 1.1529, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.9147447525265613, | |
| "grad_norm": 0.3766860837932323, | |
| "learning_rate": 4.395863407135614e-06, | |
| "loss": 1.0821, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.9160404249805649, | |
| "grad_norm": 0.3760175631598513, | |
| "learning_rate": 4.264216471080651e-06, | |
| "loss": 1.1451, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.9173360974345686, | |
| "grad_norm": 0.3901693827353405, | |
| "learning_rate": 4.134527945589639e-06, | |
| "loss": 1.1148, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.9186317698885722, | |
| "grad_norm": 0.4488796382773471, | |
| "learning_rate": 4.006800483624396e-06, | |
| "loss": 1.1614, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 0.9199274423425758, | |
| "grad_norm": 0.40897274265302724, | |
| "learning_rate": 3.881036698030449e-06, | |
| "loss": 1.1744, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.9212231147965795, | |
| "grad_norm": 0.3690887838305789, | |
| "learning_rate": 3.7572391614834833e-06, | |
| "loss": 1.136, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 0.922518787250583, | |
| "grad_norm": 0.3688927093580592, | |
| "learning_rate": 3.6354104064368566e-06, | |
| "loss": 1.0947, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.9238144597045866, | |
| "grad_norm": 0.3826643203341735, | |
| "learning_rate": 3.515552925069676e-06, | |
| "loss": 1.1757, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 0.9251101321585903, | |
| "grad_norm": 0.3684225941093978, | |
| "learning_rate": 3.397669169235862e-06, | |
| "loss": 1.1574, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.9264058046125939, | |
| "grad_norm": 0.3447005114527934, | |
| "learning_rate": 3.281761550414042e-06, | |
| "loss": 1.0847, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.9277014770665976, | |
| "grad_norm": 0.38643242398136646, | |
| "learning_rate": 3.1678324396581137e-06, | |
| "loss": 1.2221, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.9289971495206012, | |
| "grad_norm": 0.37779005091381196, | |
| "learning_rate": 3.0558841675488393e-06, | |
| "loss": 1.1612, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 0.9302928219746048, | |
| "grad_norm": 0.38097475759248384, | |
| "learning_rate": 2.9459190241461043e-06, | |
| "loss": 1.1267, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.9315884944286085, | |
| "grad_norm": 0.37618918488273334, | |
| "learning_rate": 2.8379392589421237e-06, | |
| "loss": 1.1563, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 0.9328841668826121, | |
| "grad_norm": 0.3621451372937871, | |
| "learning_rate": 2.7319470808153892e-06, | |
| "loss": 1.1197, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9341798393366157, | |
| "grad_norm": 0.35758689530964316, | |
| "learning_rate": 2.6279446579854707e-06, | |
| "loss": 1.1446, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 0.9354755117906194, | |
| "grad_norm": 0.3803519145314357, | |
| "learning_rate": 2.525934117968731e-06, | |
| "loss": 1.1823, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.9367711842446229, | |
| "grad_norm": 0.36540749180301496, | |
| "learning_rate": 2.4259175475347172e-06, | |
| "loss": 1.1748, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 0.9380668566986265, | |
| "grad_norm": 0.34819215016667493, | |
| "learning_rate": 2.3278969926635252e-06, | |
| "loss": 1.1036, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.9393625291526302, | |
| "grad_norm": 0.3586884150884424, | |
| "learning_rate": 2.2318744585039796e-06, | |
| "loss": 1.1192, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.9406582016066338, | |
| "grad_norm": 0.37877938188955895, | |
| "learning_rate": 2.1378519093324776e-06, | |
| "loss": 1.128, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.9419538740606375, | |
| "grad_norm": 0.3781105512240613, | |
| "learning_rate": 2.0458312685129876e-06, | |
| "loss": 1.1612, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 0.9432495465146411, | |
| "grad_norm": 0.37487413050848234, | |
| "learning_rate": 1.955814418457591e-06, | |
| "loss": 1.1396, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.9445452189686447, | |
| "grad_norm": 0.3840114910436269, | |
| "learning_rate": 1.867803200587981e-06, | |
| "loss": 1.1129, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 0.9458408914226484, | |
| "grad_norm": 0.3719048343830175, | |
| "learning_rate": 1.7817994152978468e-06, | |
| "loss": 1.1157, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.947136563876652, | |
| "grad_norm": 0.3772856287062892, | |
| "learning_rate": 1.6978048219159714e-06, | |
| "loss": 1.1194, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 0.9484322363306557, | |
| "grad_norm": 0.3598408324385285, | |
| "learning_rate": 1.6158211386703259e-06, | |
| "loss": 1.1284, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.9497279087846593, | |
| "grad_norm": 0.365397297934618, | |
| "learning_rate": 1.5358500426528422e-06, | |
| "loss": 1.1282, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 0.9510235812386628, | |
| "grad_norm": 0.3659664905751428, | |
| "learning_rate": 1.4578931697851406e-06, | |
| "loss": 1.1051, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.9523192536926665, | |
| "grad_norm": 0.3549703867179501, | |
| "learning_rate": 1.3819521147851123e-06, | |
| "loss": 1.1292, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.9536149261466701, | |
| "grad_norm": 0.36856057544037374, | |
| "learning_rate": 1.3080284311341674e-06, | |
| "loss": 1.141, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.9549105986006737, | |
| "grad_norm": 0.3869169065095209, | |
| "learning_rate": 1.236123631045627e-06, | |
| "loss": 1.1622, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 0.9562062710546774, | |
| "grad_norm": 0.35481859423446044, | |
| "learning_rate": 1.1662391854336263e-06, | |
| "loss": 1.1223, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.957501943508681, | |
| "grad_norm": 0.38700062266195656, | |
| "learning_rate": 1.0983765238831377e-06, | |
| "loss": 1.1501, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 0.9587976159626846, | |
| "grad_norm": 0.3672922868247709, | |
| "learning_rate": 1.032537034620684e-06, | |
| "loss": 1.1419, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9600932884166883, | |
| "grad_norm": 0.37605980694280633, | |
| "learning_rate": 9.687220644859275e-07, | |
| "loss": 1.133, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 0.9613889608706919, | |
| "grad_norm": 0.36877920673269027, | |
| "learning_rate": 9.069329189041464e-07, | |
| "loss": 1.1608, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.9626846333246956, | |
| "grad_norm": 0.381237649400668, | |
| "learning_rate": 8.471708618595142e-07, | |
| "loss": 1.132, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 0.9639803057786992, | |
| "grad_norm": 1.8685424500994194, | |
| "learning_rate": 7.894371158692627e-07, | |
| "loss": 1.1656, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.9652759782327027, | |
| "grad_norm": 0.38287612615523475, | |
| "learning_rate": 7.337328619586359e-07, | |
| "loss": 1.1992, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.9665716506867064, | |
| "grad_norm": 0.3838324502538139, | |
| "learning_rate": 6.800592396367545e-07, | |
| "loss": 1.1816, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.96786732314071, | |
| "grad_norm": 0.36218594513869495, | |
| "learning_rate": 6.284173468733334e-07, | |
| "loss": 1.1188, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 0.9691629955947136, | |
| "grad_norm": 0.3605898736094485, | |
| "learning_rate": 5.788082400761563e-07, | |
| "loss": 1.1342, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.9704586680487173, | |
| "grad_norm": 0.3819284101383782, | |
| "learning_rate": 5.312329340695143e-07, | |
| "loss": 1.1341, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 0.9717543405027209, | |
| "grad_norm": 0.371840798865179, | |
| "learning_rate": 4.856924020734565e-07, | |
| "loss": 1.1315, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.9730500129567246, | |
| "grad_norm": 0.37381875278150484, | |
| "learning_rate": 4.4218757568380563e-07, | |
| "loss": 1.1129, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 0.9743456854107282, | |
| "grad_norm": 0.37126771253731844, | |
| "learning_rate": 4.007193448532065e-07, | |
| "loss": 1.1666, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.9756413578647318, | |
| "grad_norm": 0.36988126308198593, | |
| "learning_rate": 3.612885578728298e-07, | |
| "loss": 1.1429, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 0.9769370303187355, | |
| "grad_norm": 0.3778202136491613, | |
| "learning_rate": 3.2389602135507457e-07, | |
| "loss": 1.1985, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.9782327027727391, | |
| "grad_norm": 0.3544356950249304, | |
| "learning_rate": 2.885425002170594e-07, | |
| "loss": 1.1469, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.9795283752267426, | |
| "grad_norm": 0.37874103417633515, | |
| "learning_rate": 2.5522871766494595e-07, | |
| "loss": 1.1101, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.9808240476807463, | |
| "grad_norm": 0.3788020452535586, | |
| "learning_rate": 2.2395535517917287e-07, | |
| "loss": 1.1895, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 0.9821197201347499, | |
| "grad_norm": 0.37493085740769366, | |
| "learning_rate": 1.947230525005006e-07, | |
| "loss": 1.1213, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.9834153925887535, | |
| "grad_norm": 0.36932806609876156, | |
| "learning_rate": 1.6753240761693268e-07, | |
| "loss": 1.1141, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 0.9847110650427572, | |
| "grad_norm": 0.3675515005536323, | |
| "learning_rate": 1.4238397675150339e-07, | |
| "loss": 1.1292, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9860067374967608, | |
| "grad_norm": 0.3595954681947242, | |
| "learning_rate": 1.1927827435084248e-07, | |
| "loss": 1.1044, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 0.9873024099507645, | |
| "grad_norm": 0.3750795386829234, | |
| "learning_rate": 9.821577307470575e-08, | |
| "loss": 1.1441, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.9885980824047681, | |
| "grad_norm": 0.36749997185699007, | |
| "learning_rate": 7.919690378629385e-08, | |
| "loss": 1.171, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 0.9898937548587717, | |
| "grad_norm": 0.3622505687301632, | |
| "learning_rate": 6.222205554339277e-08, | |
| "loss": 1.1675, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.9911894273127754, | |
| "grad_norm": 0.3589951765733612, | |
| "learning_rate": 4.729157559049124e-08, | |
| "loss": 1.1451, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.992485099766779, | |
| "grad_norm": 0.372014869272016, | |
| "learning_rate": 3.440576935164197e-08, | |
| "loss": 1.1383, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.9937807722207825, | |
| "grad_norm": 0.36261244538328924, | |
| "learning_rate": 2.3564900424188906e-08, | |
| "loss": 1.1706, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 0.9950764446747862, | |
| "grad_norm": 0.37064889363490416, | |
| "learning_rate": 1.4769190573393765e-08, | |
| "loss": 1.1344, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.9963721171287898, | |
| "grad_norm": 0.4019334217280328, | |
| "learning_rate": 8.018819727928505e-09, | |
| "loss": 1.19, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 0.9976677895827935, | |
| "grad_norm": 0.3532944069166521, | |
| "learning_rate": 3.313925976156096e-09, | |
| "loss": 1.1704, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9989634620367971, | |
| "grad_norm": 0.3954932358396543, | |
| "learning_rate": 6.546055633105397e-10, | |
| "loss": 1.0999, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 3.6908, | |
| "eval_samples_per_second": 2.709, | |
| "eval_steps_per_second": 0.813, | |
| "step": 3859 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3859, | |
| "total_flos": 1.3011067827388416e+16, | |
| "train_loss": 1.2484874595598594, | |
| "train_runtime": 22644.1998, | |
| "train_samples_per_second": 2.726, | |
| "train_steps_per_second": 0.17 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3859, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3011067827388416e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |