| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.99960508648606, |
| "eval_steps": 500, |
| "global_step": 15825, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0031593081115235764, |
| "grad_norm": 3.847676639099889, |
| "learning_rate": 6.317119393556539e-08, |
| "loss": 0.5818, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006318616223047153, |
| "grad_norm": 3.8262659782800736, |
| "learning_rate": 1.2634238787113078e-07, |
| "loss": 0.5856, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00947792433457073, |
| "grad_norm": 3.2562040895477886, |
| "learning_rate": 1.8951358180669618e-07, |
| "loss": 0.5757, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.012637232446094306, |
| "grad_norm": 2.3293562431253108, |
| "learning_rate": 2.5268477574226156e-07, |
| "loss": 0.5599, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01579654055761788, |
| "grad_norm": 1.7404444983787009, |
| "learning_rate": 3.158559696778269e-07, |
| "loss": 0.5394, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01895584866914146, |
| "grad_norm": 1.3641917061000175, |
| "learning_rate": 3.7902716361339236e-07, |
| "loss": 0.5113, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.022115156780665033, |
| "grad_norm": 0.9564559310967178, |
| "learning_rate": 4.421983575489577e-07, |
| "loss": 0.4871, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02527446489218861, |
| "grad_norm": 0.7099841750448822, |
| "learning_rate": 5.053695514845231e-07, |
| "loss": 0.4714, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.028433773003712186, |
| "grad_norm": 0.5402837514386732, |
| "learning_rate": 5.685407454200885e-07, |
| "loss": 0.451, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03159308111523576, |
| "grad_norm": 0.4807271971224434, |
| "learning_rate": 6.317119393556538e-07, |
| "loss": 0.4323, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03475238922675934, |
| "grad_norm": 0.34807058040387906, |
| "learning_rate": 6.948831332912193e-07, |
| "loss": 0.4219, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03791169733828292, |
| "grad_norm": 0.3269546213934352, |
| "learning_rate": 7.580543272267847e-07, |
| "loss": 0.4152, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04107100544980649, |
| "grad_norm": 0.29931985054640087, |
| "learning_rate": 8.212255211623501e-07, |
| "loss": 0.4046, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.044230313561330066, |
| "grad_norm": 0.3007912763355078, |
| "learning_rate": 8.843967150979154e-07, |
| "loss": 0.3975, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04738962167285365, |
| "grad_norm": 0.3314878634217047, |
| "learning_rate": 9.475679090334808e-07, |
| "loss": 0.398, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05054892978437722, |
| "grad_norm": 0.3099556109376344, |
| "learning_rate": 1.0107391029690462e-06, |
| "loss": 0.3931, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0537082378959008, |
| "grad_norm": 0.29068208472557255, |
| "learning_rate": 1.0739102969046116e-06, |
| "loss": 0.3858, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05686754600742437, |
| "grad_norm": 0.27910793332859557, |
| "learning_rate": 1.137081490840177e-06, |
| "loss": 0.3808, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06002685411894795, |
| "grad_norm": 0.30546375486352645, |
| "learning_rate": 1.2002526847757423e-06, |
| "loss": 0.3762, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06318616223047152, |
| "grad_norm": 0.3554409408339123, |
| "learning_rate": 1.2634238787113076e-06, |
| "loss": 0.3735, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0663454703419951, |
| "grad_norm": 0.32484090299900514, |
| "learning_rate": 1.3265950726468732e-06, |
| "loss": 0.3674, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.06950477845351868, |
| "grad_norm": 0.3068743877352151, |
| "learning_rate": 1.3897662665824385e-06, |
| "loss": 0.3666, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07266408656504225, |
| "grad_norm": 0.29618802262032184, |
| "learning_rate": 1.4529374605180039e-06, |
| "loss": 0.3618, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07582339467656583, |
| "grad_norm": 0.3148809652314096, |
| "learning_rate": 1.5161086544535694e-06, |
| "loss": 0.3624, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.07898270278808942, |
| "grad_norm": 0.2804644371038647, |
| "learning_rate": 1.5792798483891348e-06, |
| "loss": 0.3613, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08214201089961298, |
| "grad_norm": 0.3309683996287944, |
| "learning_rate": 1.6424510423247001e-06, |
| "loss": 0.3581, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08530131901113656, |
| "grad_norm": 0.29235969381132293, |
| "learning_rate": 1.7056222362602653e-06, |
| "loss": 0.3556, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.08846062712266013, |
| "grad_norm": 0.3059950184586691, |
| "learning_rate": 1.7687934301958308e-06, |
| "loss": 0.3485, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09161993523418371, |
| "grad_norm": 0.3065943367603657, |
| "learning_rate": 1.8319646241313962e-06, |
| "loss": 0.3441, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0947792433457073, |
| "grad_norm": 0.29754666934058893, |
| "learning_rate": 1.8951358180669615e-06, |
| "loss": 0.3486, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.09793855145723086, |
| "grad_norm": 0.31042175851236, |
| "learning_rate": 1.9583070120025267e-06, |
| "loss": 0.3458, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10109785956875444, |
| "grad_norm": 0.28167478495509757, |
| "learning_rate": 2.0214782059380925e-06, |
| "loss": 0.3425, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.10425716768027801, |
| "grad_norm": 0.2937174634980692, |
| "learning_rate": 2.084649399873658e-06, |
| "loss": 0.3409, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1074164757918016, |
| "grad_norm": 0.28351076022747446, |
| "learning_rate": 2.147820593809223e-06, |
| "loss": 0.3377, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11057578390332518, |
| "grad_norm": 0.2873794548808371, |
| "learning_rate": 2.2109917877447885e-06, |
| "loss": 0.3365, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11373509201484874, |
| "grad_norm": 0.28077300443160785, |
| "learning_rate": 2.274162981680354e-06, |
| "loss": 0.3395, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.11689440012637232, |
| "grad_norm": 0.2895451240097338, |
| "learning_rate": 2.337334175615919e-06, |
| "loss": 0.3356, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1200537082378959, |
| "grad_norm": 0.29469241884731345, |
| "learning_rate": 2.4005053695514845e-06, |
| "loss": 0.3323, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12321301634941947, |
| "grad_norm": 0.31436693982320113, |
| "learning_rate": 2.4636765634870503e-06, |
| "loss": 0.333, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.12637232446094304, |
| "grad_norm": 0.2882094750389832, |
| "learning_rate": 2.5268477574226152e-06, |
| "loss": 0.3316, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.12953163257246664, |
| "grad_norm": 0.33020346533639344, |
| "learning_rate": 2.590018951358181e-06, |
| "loss": 0.328, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1326909406839902, |
| "grad_norm": 0.3442838133962943, |
| "learning_rate": 2.6531901452937464e-06, |
| "loss": 0.3254, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.13585024879551377, |
| "grad_norm": 0.3032556873766918, |
| "learning_rate": 2.7163613392293113e-06, |
| "loss": 0.3249, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.13900955690703737, |
| "grad_norm": 0.3056937672984876, |
| "learning_rate": 2.779532533164877e-06, |
| "loss": 0.3229, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14216886501856094, |
| "grad_norm": 0.28744855606538844, |
| "learning_rate": 2.8427037271004424e-06, |
| "loss": 0.3233, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1453281731300845, |
| "grad_norm": 0.31562420003771263, |
| "learning_rate": 2.9058749210360078e-06, |
| "loss": 0.3226, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1484874812416081, |
| "grad_norm": 0.2778274627103786, |
| "learning_rate": 2.969046114971573e-06, |
| "loss": 0.3209, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.15164678935313167, |
| "grad_norm": 0.31527911586537605, |
| "learning_rate": 3.032217308907139e-06, |
| "loss": 0.3233, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.15480609746465523, |
| "grad_norm": 0.28579902930861095, |
| "learning_rate": 3.095388502842704e-06, |
| "loss": 0.3187, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.15796540557617883, |
| "grad_norm": 0.2854450060082136, |
| "learning_rate": 3.1585596967782696e-06, |
| "loss": 0.3164, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1611247136877024, |
| "grad_norm": 0.2783395921293009, |
| "learning_rate": 3.2217308907138345e-06, |
| "loss": 0.3172, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.16428402179922597, |
| "grad_norm": 0.30491989041976003, |
| "learning_rate": 3.2849020846494003e-06, |
| "loss": 0.3129, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.16744332991074953, |
| "grad_norm": 0.32468595503092035, |
| "learning_rate": 3.3480732785849656e-06, |
| "loss": 0.3147, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17060263802227313, |
| "grad_norm": 0.31416335531159423, |
| "learning_rate": 3.4112444725205306e-06, |
| "loss": 0.3125, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1737619461337967, |
| "grad_norm": 0.36383194617757575, |
| "learning_rate": 3.4744156664560963e-06, |
| "loss": 0.3138, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.17692125424532026, |
| "grad_norm": 0.29533777762093066, |
| "learning_rate": 3.5375868603916617e-06, |
| "loss": 0.3116, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18008056235684386, |
| "grad_norm": 0.3493771802079338, |
| "learning_rate": 3.600758054327227e-06, |
| "loss": 0.3135, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.18323987046836743, |
| "grad_norm": 0.2944691289331218, |
| "learning_rate": 3.6639292482627924e-06, |
| "loss": 0.3163, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.186399178579891, |
| "grad_norm": 0.31590918798291073, |
| "learning_rate": 3.727100442198358e-06, |
| "loss": 0.3087, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1895584866914146, |
| "grad_norm": 0.36702711115113235, |
| "learning_rate": 3.790271636133923e-06, |
| "loss": 0.3048, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19271779480293816, |
| "grad_norm": 0.34337289204312266, |
| "learning_rate": 3.853442830069489e-06, |
| "loss": 0.3097, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.19587710291446173, |
| "grad_norm": 0.3043737272471374, |
| "learning_rate": 3.916614024005053e-06, |
| "loss": 0.3071, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.19903641102598532, |
| "grad_norm": 0.31254100353406794, |
| "learning_rate": 3.9797852179406196e-06, |
| "loss": 0.3035, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2021957191375089, |
| "grad_norm": 0.2865164407577743, |
| "learning_rate": 4.042956411876185e-06, |
| "loss": 0.3047, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.20535502724903246, |
| "grad_norm": 0.29447310088157563, |
| "learning_rate": 4.10612760581175e-06, |
| "loss": 0.3037, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.20851433536055602, |
| "grad_norm": 0.29267707519023506, |
| "learning_rate": 4.169298799747316e-06, |
| "loss": 0.3028, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.21167364347207962, |
| "grad_norm": 0.2963460269610618, |
| "learning_rate": 4.232469993682881e-06, |
| "loss": 0.305, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2148329515836032, |
| "grad_norm": 0.2978257495744734, |
| "learning_rate": 4.295641187618446e-06, |
| "loss": 0.3004, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.21799225969512676, |
| "grad_norm": 0.2906359775378807, |
| "learning_rate": 4.358812381554012e-06, |
| "loss": 0.3019, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.22115156780665035, |
| "grad_norm": 0.3446901435895929, |
| "learning_rate": 4.421983575489577e-06, |
| "loss": 0.3024, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.22431087591817392, |
| "grad_norm": 0.30756534246023176, |
| "learning_rate": 4.485154769425142e-06, |
| "loss": 0.2959, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2274701840296975, |
| "grad_norm": 0.3155947525143884, |
| "learning_rate": 4.548325963360708e-06, |
| "loss": 0.3005, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.23062949214122108, |
| "grad_norm": 0.2995756375311246, |
| "learning_rate": 4.611497157296273e-06, |
| "loss": 0.2975, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.23378880025274465, |
| "grad_norm": 0.30858061818787486, |
| "learning_rate": 4.674668351231838e-06, |
| "loss": 0.2978, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.23694810836426822, |
| "grad_norm": 0.3338257461588683, |
| "learning_rate": 4.737839545167405e-06, |
| "loss": 0.2933, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2401074164757918, |
| "grad_norm": 0.29022817054284084, |
| "learning_rate": 4.801010739102969e-06, |
| "loss": 0.2972, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.24326672458731538, |
| "grad_norm": 0.2896880604505418, |
| "learning_rate": 4.8641819330385344e-06, |
| "loss": 0.2946, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.24642603269883895, |
| "grad_norm": 0.2847438243168221, |
| "learning_rate": 4.927353126974101e-06, |
| "loss": 0.2969, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.24958534081036254, |
| "grad_norm": 0.3037714021164623, |
| "learning_rate": 4.990524320909665e-06, |
| "loss": 0.2937, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2527446489218861, |
| "grad_norm": 0.32395568014513065, |
| "learning_rate": 5.0536955148452305e-06, |
| "loss": 0.2939, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2559039570334097, |
| "grad_norm": 0.2957719635763268, |
| "learning_rate": 5.116866708780797e-06, |
| "loss": 0.2932, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2590632651449333, |
| "grad_norm": 0.3165621516450349, |
| "learning_rate": 5.180037902716362e-06, |
| "loss": 0.2928, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2622225732564568, |
| "grad_norm": 0.3232166339204694, |
| "learning_rate": 5.2432090966519265e-06, |
| "loss": 0.2901, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2653818813679804, |
| "grad_norm": 0.3040549753465353, |
| "learning_rate": 5.306380290587493e-06, |
| "loss": 0.2881, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.268541189479504, |
| "grad_norm": 0.32781940117167635, |
| "learning_rate": 5.369551484523058e-06, |
| "loss": 0.2908, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.27170049759102755, |
| "grad_norm": 0.3327478795766515, |
| "learning_rate": 5.432722678458623e-06, |
| "loss": 0.2907, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.27485980570255114, |
| "grad_norm": 0.2856004412508293, |
| "learning_rate": 5.495893872394189e-06, |
| "loss": 0.2934, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.27801911381407474, |
| "grad_norm": 0.3604841472498219, |
| "learning_rate": 5.559065066329754e-06, |
| "loss": 0.2855, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.2811784219255983, |
| "grad_norm": 0.32653022277153904, |
| "learning_rate": 5.6222362602653195e-06, |
| "loss": 0.2876, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.28433773003712187, |
| "grad_norm": 0.3123066830281955, |
| "learning_rate": 5.685407454200885e-06, |
| "loss": 0.2887, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.28749703814864547, |
| "grad_norm": 0.31118386648194923, |
| "learning_rate": 5.74857864813645e-06, |
| "loss": 0.2889, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.290656346260169, |
| "grad_norm": 0.29015768543262505, |
| "learning_rate": 5.8117498420720155e-06, |
| "loss": 0.2876, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2938156543716926, |
| "grad_norm": 0.2885072388494909, |
| "learning_rate": 5.874921036007582e-06, |
| "loss": 0.2868, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.2969749624832162, |
| "grad_norm": 0.3282216748098807, |
| "learning_rate": 5.938092229943146e-06, |
| "loss": 0.2877, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.30013427059473974, |
| "grad_norm": 0.29797634077774454, |
| "learning_rate": 6.001263423878712e-06, |
| "loss": 0.2873, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.30329357870626333, |
| "grad_norm": 0.33087996389175034, |
| "learning_rate": 6.064434617814278e-06, |
| "loss": 0.2838, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.30645288681778693, |
| "grad_norm": 0.29661512432744086, |
| "learning_rate": 6.127605811749843e-06, |
| "loss": 0.2877, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.30961219492931047, |
| "grad_norm": 0.31023012973801395, |
| "learning_rate": 6.190777005685408e-06, |
| "loss": 0.2826, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.31277150304083406, |
| "grad_norm": 0.30885706884471026, |
| "learning_rate": 6.253948199620974e-06, |
| "loss": 0.2845, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.31593081115235766, |
| "grad_norm": 0.3540472583639826, |
| "learning_rate": 6.317119393556539e-06, |
| "loss": 0.2809, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3190901192638812, |
| "grad_norm": 0.2730722833734264, |
| "learning_rate": 6.380290587492104e-06, |
| "loss": 0.2787, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3222494273754048, |
| "grad_norm": 0.3320814722084985, |
| "learning_rate": 6.443461781427669e-06, |
| "loss": 0.2813, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.32540873548692834, |
| "grad_norm": 0.290985113799801, |
| "learning_rate": 6.506632975363235e-06, |
| "loss": 0.2782, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.32856804359845193, |
| "grad_norm": 0.321948058443697, |
| "learning_rate": 6.5698041692988006e-06, |
| "loss": 0.2799, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.3317273517099755, |
| "grad_norm": 0.3247951324518689, |
| "learning_rate": 6.632975363234365e-06, |
| "loss": 0.2853, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.33488665982149907, |
| "grad_norm": 0.37466669888496, |
| "learning_rate": 6.696146557169931e-06, |
| "loss": 0.2847, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.33804596793302266, |
| "grad_norm": 0.3158284283569005, |
| "learning_rate": 6.759317751105497e-06, |
| "loss": 0.2793, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.34120527604454626, |
| "grad_norm": 0.2844891421044668, |
| "learning_rate": 6.822488945041061e-06, |
| "loss": 0.2812, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.3443645841560698, |
| "grad_norm": 0.33051991039937395, |
| "learning_rate": 6.885660138976627e-06, |
| "loss": 0.2811, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.3475238922675934, |
| "grad_norm": 0.3186151235138414, |
| "learning_rate": 6.948831332912193e-06, |
| "loss": 0.2851, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.350683200379117, |
| "grad_norm": 0.3123969158631383, |
| "learning_rate": 7.012002526847758e-06, |
| "loss": 0.2798, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.35384250849064053, |
| "grad_norm": 0.3280651223802418, |
| "learning_rate": 7.075173720783323e-06, |
| "loss": 0.2835, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3570018166021641, |
| "grad_norm": 0.29332806586366567, |
| "learning_rate": 7.138344914718889e-06, |
| "loss": 0.2775, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3601611247136877, |
| "grad_norm": 0.33171088512344676, |
| "learning_rate": 7.201516108654454e-06, |
| "loss": 0.2752, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.36332043282521126, |
| "grad_norm": 0.3000902915772275, |
| "learning_rate": 7.26468730259002e-06, |
| "loss": 0.2807, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.36647974093673485, |
| "grad_norm": 0.2883327293237225, |
| "learning_rate": 7.327858496525585e-06, |
| "loss": 0.2762, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.36963904904825845, |
| "grad_norm": 0.3144189091034534, |
| "learning_rate": 7.39102969046115e-06, |
| "loss": 0.2801, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.372798357159782, |
| "grad_norm": 0.356268108886898, |
| "learning_rate": 7.454200884396716e-06, |
| "loss": 0.2777, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.3759576652713056, |
| "grad_norm": 0.3128810321586775, |
| "learning_rate": 7.517372078332281e-06, |
| "loss": 0.2757, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3791169733828292, |
| "grad_norm": 0.30434623170055075, |
| "learning_rate": 7.580543272267846e-06, |
| "loss": 0.2784, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3822762814943527, |
| "grad_norm": 0.3388464380231131, |
| "learning_rate": 7.643714466203413e-06, |
| "loss": 0.2784, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.3854355896058763, |
| "grad_norm": 0.3110630401877264, |
| "learning_rate": 7.706885660138978e-06, |
| "loss": 0.2743, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3885948977173999, |
| "grad_norm": 0.2771106506778223, |
| "learning_rate": 7.770056854074542e-06, |
| "loss": 0.2757, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.39175420582892345, |
| "grad_norm": 0.3175411734943708, |
| "learning_rate": 7.833228048010107e-06, |
| "loss": 0.2762, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.39491351394044705, |
| "grad_norm": 0.3072276379244834, |
| "learning_rate": 7.896399241945673e-06, |
| "loss": 0.2757, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.39807282205197064, |
| "grad_norm": 0.31787735230000674, |
| "learning_rate": 7.959570435881239e-06, |
| "loss": 0.2716, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4012321301634942, |
| "grad_norm": 0.32573848304334413, |
| "learning_rate": 8.022741629816804e-06, |
| "loss": 0.2718, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4043914382750178, |
| "grad_norm": 0.31968318506791704, |
| "learning_rate": 8.08591282375237e-06, |
| "loss": 0.2753, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4075507463865414, |
| "grad_norm": 0.3038009939073212, |
| "learning_rate": 8.149084017687934e-06, |
| "loss": 0.2709, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.4107100544980649, |
| "grad_norm": 0.32085469500420516, |
| "learning_rate": 8.2122552116235e-06, |
| "loss": 0.2707, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4138693626095885, |
| "grad_norm": 0.3065889908229096, |
| "learning_rate": 8.275426405559067e-06, |
| "loss": 0.2768, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.41702867072111205, |
| "grad_norm": 0.31763603457220624, |
| "learning_rate": 8.338597599494631e-06, |
| "loss": 0.2709, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.42018797883263564, |
| "grad_norm": 0.3109152063857626, |
| "learning_rate": 8.401768793430196e-06, |
| "loss": 0.2739, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.42334728694415924, |
| "grad_norm": 0.29181104171061434, |
| "learning_rate": 8.464939987365762e-06, |
| "loss": 0.2686, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4265065950556828, |
| "grad_norm": 0.2935442340016787, |
| "learning_rate": 8.528111181301328e-06, |
| "loss": 0.2707, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4296659031672064, |
| "grad_norm": 0.2906035979144556, |
| "learning_rate": 8.591282375236893e-06, |
| "loss": 0.2731, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.43282521127872997, |
| "grad_norm": 0.29758936608537967, |
| "learning_rate": 8.654453569172459e-06, |
| "loss": 0.2709, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4359845193902535, |
| "grad_norm": 0.33154204605407617, |
| "learning_rate": 8.717624763108023e-06, |
| "loss": 0.2707, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4391438275017771, |
| "grad_norm": 0.33339797215462064, |
| "learning_rate": 8.780795957043588e-06, |
| "loss": 0.2713, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.4423031356133007, |
| "grad_norm": 0.3155178562410378, |
| "learning_rate": 8.843967150979154e-06, |
| "loss": 0.2675, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.44546244372482424, |
| "grad_norm": 0.3265426814247614, |
| "learning_rate": 8.90713834491472e-06, |
| "loss": 0.2683, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.44862175183634784, |
| "grad_norm": 0.36971141677875463, |
| "learning_rate": 8.970309538850285e-06, |
| "loss": 0.2677, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.45178105994787143, |
| "grad_norm": 0.30518969540463764, |
| "learning_rate": 9.033480732785851e-06, |
| "loss": 0.2688, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.454940368059395, |
| "grad_norm": 0.34536630526318685, |
| "learning_rate": 9.096651926721415e-06, |
| "loss": 0.2673, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.45809967617091857, |
| "grad_norm": 0.35749309238694066, |
| "learning_rate": 9.159823120656982e-06, |
| "loss": 0.2691, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.46125898428244216, |
| "grad_norm": 0.31258023613884145, |
| "learning_rate": 9.222994314592546e-06, |
| "loss": 0.2664, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4644182923939657, |
| "grad_norm": 0.3450789196077563, |
| "learning_rate": 9.286165508528112e-06, |
| "loss": 0.268, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.4675776005054893, |
| "grad_norm": 0.29121813500205246, |
| "learning_rate": 9.349336702463677e-06, |
| "loss": 0.2677, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.4707369086170129, |
| "grad_norm": 0.3220463754226227, |
| "learning_rate": 9.412507896399243e-06, |
| "loss": 0.2667, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.47389621672853643, |
| "grad_norm": 0.33704239601038527, |
| "learning_rate": 9.47567909033481e-06, |
| "loss": 0.2646, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.47705552484006003, |
| "grad_norm": 0.2863325950552584, |
| "learning_rate": 9.538850284270374e-06, |
| "loss": 0.2705, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.4802148329515836, |
| "grad_norm": 0.31752841534155907, |
| "learning_rate": 9.602021478205938e-06, |
| "loss": 0.2664, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.48337414106310717, |
| "grad_norm": 0.32880306966393713, |
| "learning_rate": 9.665192672141504e-06, |
| "loss": 0.2631, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.48653344917463076, |
| "grad_norm": 0.31843129282620164, |
| "learning_rate": 9.728363866077069e-06, |
| "loss": 0.2625, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.48969275728615436, |
| "grad_norm": 0.30470755245172276, |
| "learning_rate": 9.791535060012635e-06, |
| "loss": 0.2666, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4928520653976779, |
| "grad_norm": 0.33123646966581777, |
| "learning_rate": 9.854706253948201e-06, |
| "loss": 0.2646, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.4960113735092015, |
| "grad_norm": 0.28677001614790365, |
| "learning_rate": 9.917877447883766e-06, |
| "loss": 0.2622, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.4991706816207251, |
| "grad_norm": 0.324282072097384, |
| "learning_rate": 9.98104864181933e-06, |
| "loss": 0.2678, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5023299897322486, |
| "grad_norm": 0.3255697445472025, |
| "learning_rate": 9.999994039347758e-06, |
| "loss": 0.2637, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5054892978437722, |
| "grad_norm": 0.2933750585789845, |
| "learning_rate": 9.999964844350574e-06, |
| "loss": 0.2649, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5086486059552958, |
| "grad_norm": 0.33377765453076996, |
| "learning_rate": 9.999911320336655e-06, |
| "loss": 0.267, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5118079140668194, |
| "grad_norm": 0.304237717093902, |
| "learning_rate": 9.999833467566438e-06, |
| "loss": 0.2633, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5149672221783429, |
| "grad_norm": 0.3058481570433111, |
| "learning_rate": 9.999731286418741e-06, |
| "loss": 0.262, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5181265302898665, |
| "grad_norm": 0.3392309580496954, |
| "learning_rate": 9.999604777390763e-06, |
| "loss": 0.2593, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5212858384013901, |
| "grad_norm": 0.2895018374347362, |
| "learning_rate": 9.999453941098077e-06, |
| "loss": 0.2625, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5244451465129136, |
| "grad_norm": 0.2912777670223621, |
| "learning_rate": 9.999278778274627e-06, |
| "loss": 0.266, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5276044546244373, |
| "grad_norm": 0.3529258059433674, |
| "learning_rate": 9.999079289772724e-06, |
| "loss": 0.2619, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.5307637627359608, |
| "grad_norm": 0.2738135177871275, |
| "learning_rate": 9.99885547656305e-06, |
| "loss": 0.2618, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5339230708474844, |
| "grad_norm": 0.2692997188904244, |
| "learning_rate": 9.998607339734643e-06, |
| "loss": 0.2606, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.537082378959008, |
| "grad_norm": 0.2949366840504707, |
| "learning_rate": 9.998334880494898e-06, |
| "loss": 0.2612, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5402416870705316, |
| "grad_norm": 0.3057325239685516, |
| "learning_rate": 9.998038100169554e-06, |
| "loss": 0.261, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.5434009951820551, |
| "grad_norm": 0.27124805299961813, |
| "learning_rate": 9.997717000202696e-06, |
| "loss": 0.2598, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5465603032935787, |
| "grad_norm": 0.30051977914101224, |
| "learning_rate": 9.997371582156747e-06, |
| "loss": 0.2602, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5497196114051023, |
| "grad_norm": 0.3075193191916608, |
| "learning_rate": 9.997001847712456e-06, |
| "loss": 0.2601, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.5528789195166258, |
| "grad_norm": 0.29754589174948665, |
| "learning_rate": 9.996607798668887e-06, |
| "loss": 0.2592, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5560382276281495, |
| "grad_norm": 0.33700900362331376, |
| "learning_rate": 9.99618943694342e-06, |
| "loss": 0.262, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.559197535739673, |
| "grad_norm": 0.3265608460452239, |
| "learning_rate": 9.995746764571736e-06, |
| "loss": 0.259, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5623568438511966, |
| "grad_norm": 0.31549240271112217, |
| "learning_rate": 9.995279783707805e-06, |
| "loss": 0.2607, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5655161519627202, |
| "grad_norm": 0.3586012625330008, |
| "learning_rate": 9.994788496623884e-06, |
| "loss": 0.2572, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5686754600742437, |
| "grad_norm": 0.3056154836660224, |
| "learning_rate": 9.994272905710491e-06, |
| "loss": 0.2526, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5718347681857673, |
| "grad_norm": 0.2613411851814494, |
| "learning_rate": 9.993733013476412e-06, |
| "loss": 0.2561, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5749940762972909, |
| "grad_norm": 0.3254616196951068, |
| "learning_rate": 9.993168822548672e-06, |
| "loss": 0.257, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.5781533844088145, |
| "grad_norm": 0.3020785656622962, |
| "learning_rate": 9.992580335672535e-06, |
| "loss": 0.2557, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.581312692520338, |
| "grad_norm": 0.26993214887498285, |
| "learning_rate": 9.99196755571148e-06, |
| "loss": 0.2536, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.5844720006318617, |
| "grad_norm": 0.2639927502536523, |
| "learning_rate": 9.991330485647195e-06, |
| "loss": 0.26, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5876313087433852, |
| "grad_norm": 0.30065294121954683, |
| "learning_rate": 9.990669128579562e-06, |
| "loss": 0.2605, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5907906168549087, |
| "grad_norm": 0.30583938194208843, |
| "learning_rate": 9.989983487726634e-06, |
| "loss": 0.2563, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.5939499249664324, |
| "grad_norm": 0.2864579077847969, |
| "learning_rate": 9.989273566424629e-06, |
| "loss": 0.2606, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.5971092330779559, |
| "grad_norm": 0.3026768306473177, |
| "learning_rate": 9.98853936812791e-06, |
| "loss": 0.258, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6002685411894795, |
| "grad_norm": 0.29314955217944727, |
| "learning_rate": 9.987780896408966e-06, |
| "loss": 0.2589, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6034278493010031, |
| "grad_norm": 0.31058695039428386, |
| "learning_rate": 9.986998154958395e-06, |
| "loss": 0.253, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6065871574125267, |
| "grad_norm": 0.2845944676061047, |
| "learning_rate": 9.986191147584893e-06, |
| "loss": 0.2546, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6097464655240502, |
| "grad_norm": 0.29933495312410735, |
| "learning_rate": 9.985359878215224e-06, |
| "loss": 0.2552, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6129057736355739, |
| "grad_norm": 0.2759459833479636, |
| "learning_rate": 9.984504350894213e-06, |
| "loss": 0.2574, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6160650817470974, |
| "grad_norm": 0.3066829895649084, |
| "learning_rate": 9.983624569784714e-06, |
| "loss": 0.2553, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6192243898586209, |
| "grad_norm": 0.2690944667845755, |
| "learning_rate": 9.982720539167601e-06, |
| "loss": 0.2568, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6223836979701446, |
| "grad_norm": 0.2946036416755621, |
| "learning_rate": 9.981792263441739e-06, |
| "loss": 0.2543, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.6255430060816681, |
| "grad_norm": 0.2747932187325911, |
| "learning_rate": 9.980839747123967e-06, |
| "loss": 0.2557, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6287023141931917, |
| "grad_norm": 0.2851964847837964, |
| "learning_rate": 9.979862994849074e-06, |
| "loss": 0.2541, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.6318616223047153, |
| "grad_norm": 0.31890735265964804, |
| "learning_rate": 9.978862011369779e-06, |
| "loss": 0.2558, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6350209304162389, |
| "grad_norm": 0.28038726746988707, |
| "learning_rate": 9.977836801556705e-06, |
| "loss": 0.2538, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6381802385277624, |
| "grad_norm": 0.295646403302159, |
| "learning_rate": 9.976787370398355e-06, |
| "loss": 0.2546, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.641339546639286, |
| "grad_norm": 0.2902155436032145, |
| "learning_rate": 9.975713723001093e-06, |
| "loss": 0.251, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6444988547508096, |
| "grad_norm": 0.2828393586793592, |
| "learning_rate": 9.974615864589112e-06, |
| "loss": 0.2559, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.6476581628623331, |
| "grad_norm": 0.28577077749952917, |
| "learning_rate": 9.97349380050441e-06, |
| "loss": 0.2531, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6508174709738567, |
| "grad_norm": 0.29564024961232643, |
| "learning_rate": 9.972347536206772e-06, |
| "loss": 0.2506, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6539767790853803, |
| "grad_norm": 0.33183704608229764, |
| "learning_rate": 9.971177077273732e-06, |
| "loss": 0.2534, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.6571360871969039, |
| "grad_norm": 0.28977544469285044, |
| "learning_rate": 9.969982429400556e-06, |
| "loss": 0.2537, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.6602953953084274, |
| "grad_norm": 0.29923378551865065, |
| "learning_rate": 9.968763598400202e-06, |
| "loss": 0.2569, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.663454703419951, |
| "grad_norm": 0.3012488805857487, |
| "learning_rate": 9.967520590203305e-06, |
| "loss": 0.2509, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6666140115314746, |
| "grad_norm": 0.2916467483906387, |
| "learning_rate": 9.966253410858145e-06, |
| "loss": 0.2551, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.6697733196429981, |
| "grad_norm": 0.27652645594826225, |
| "learning_rate": 9.964962066530604e-06, |
| "loss": 0.2515, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.6729326277545218, |
| "grad_norm": 0.27562185290008373, |
| "learning_rate": 9.963646563504158e-06, |
| "loss": 0.2544, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6760919358660453, |
| "grad_norm": 0.25732340414313354, |
| "learning_rate": 9.962306908179833e-06, |
| "loss": 0.2515, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.6792512439775689, |
| "grad_norm": 0.26515843698964003, |
| "learning_rate": 9.96094310707617e-06, |
| "loss": 0.2494, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6824105520890925, |
| "grad_norm": 0.28168758705492475, |
| "learning_rate": 9.959555166829204e-06, |
| "loss": 0.2494, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.685569860200616, |
| "grad_norm": 0.24631536243430951, |
| "learning_rate": 9.95814309419243e-06, |
| "loss": 0.2519, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.6887291683121396, |
| "grad_norm": 0.26426587738324664, |
| "learning_rate": 9.956706896036762e-06, |
| "loss": 0.2533, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.6918884764236632, |
| "grad_norm": 0.24898568379039823, |
| "learning_rate": 9.955246579350505e-06, |
| "loss": 0.2491, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.6950477845351868, |
| "grad_norm": 0.30540369742862966, |
| "learning_rate": 9.953762151239327e-06, |
| "loss": 0.2478, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6982070926467103, |
| "grad_norm": 0.2709499727026948, |
| "learning_rate": 9.952253618926212e-06, |
| "loss": 0.2515, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.701366400758234, |
| "grad_norm": 0.3003234996003297, |
| "learning_rate": 9.95072098975143e-06, |
| "loss": 0.2541, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7045257088697575, |
| "grad_norm": 0.2736168957433189, |
| "learning_rate": 9.949164271172512e-06, |
| "loss": 0.2499, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7076850169812811, |
| "grad_norm": 0.2651616205282266, |
| "learning_rate": 9.947583470764193e-06, |
| "loss": 0.2506, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.7108443250928047, |
| "grad_norm": 0.2650203207221141, |
| "learning_rate": 9.945978596218391e-06, |
| "loss": 0.2488, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7140036332043282, |
| "grad_norm": 0.31633222399887706, |
| "learning_rate": 9.944349655344168e-06, |
| "loss": 0.2504, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7171629413158518, |
| "grad_norm": 0.3450314977108967, |
| "learning_rate": 9.942696656067683e-06, |
| "loss": 0.2487, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7203222494273754, |
| "grad_norm": 0.2676203266167299, |
| "learning_rate": 9.941019606432163e-06, |
| "loss": 0.2515, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.723481557538899, |
| "grad_norm": 0.27501120919754213, |
| "learning_rate": 9.93931851459786e-06, |
| "loss": 0.2472, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.7266408656504225, |
| "grad_norm": 0.2721444766021836, |
| "learning_rate": 9.937593388842008e-06, |
| "loss": 0.2484, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7298001737619462, |
| "grad_norm": 0.3055396538597429, |
| "learning_rate": 9.935844237558792e-06, |
| "loss": 0.2491, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.7329594818734697, |
| "grad_norm": 0.2687664962558202, |
| "learning_rate": 9.934071069259295e-06, |
| "loss": 0.2511, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.7361187899849932, |
| "grad_norm": 0.26485303698836093, |
| "learning_rate": 9.932273892571467e-06, |
| "loss": 0.2493, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.7392780980965169, |
| "grad_norm": 0.27030949688183054, |
| "learning_rate": 9.930452716240077e-06, |
| "loss": 0.2465, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.7424374062080404, |
| "grad_norm": 0.26582715073120317, |
| "learning_rate": 9.928607549126677e-06, |
| "loss": 0.2492, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.745596714319564, |
| "grad_norm": 0.26161071455089063, |
| "learning_rate": 9.926738400209546e-06, |
| "loss": 0.2473, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7487560224310876, |
| "grad_norm": 0.27281673381922744, |
| "learning_rate": 9.924845278583661e-06, |
| "loss": 0.2461, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.7519153305426112, |
| "grad_norm": 0.25026144589450994, |
| "learning_rate": 9.922928193460644e-06, |
| "loss": 0.2447, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.7550746386541347, |
| "grad_norm": 0.2928912991449991, |
| "learning_rate": 9.920987154168719e-06, |
| "loss": 0.2461, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.7582339467656584, |
| "grad_norm": 0.2850696882452383, |
| "learning_rate": 9.919022170152668e-06, |
| "loss": 0.2499, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.7613932548771819, |
| "grad_norm": 0.30443484331190923, |
| "learning_rate": 9.917033250973786e-06, |
| "loss": 0.2493, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.7645525629887054, |
| "grad_norm": 0.3212209432208665, |
| "learning_rate": 9.915020406309828e-06, |
| "loss": 0.2491, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.7677118711002291, |
| "grad_norm": 0.28504423491604935, |
| "learning_rate": 9.912983645954973e-06, |
| "loss": 0.2474, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.7708711792117526, |
| "grad_norm": 0.2878869891714329, |
| "learning_rate": 9.910922979819762e-06, |
| "loss": 0.2492, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.7740304873232762, |
| "grad_norm": 0.2771202754746543, |
| "learning_rate": 9.908838417931062e-06, |
| "loss": 0.2472, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.7771897954347998, |
| "grad_norm": 0.31577544480370157, |
| "learning_rate": 9.906729970432014e-06, |
| "loss": 0.249, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.7803491035463234, |
| "grad_norm": 0.3082921227301929, |
| "learning_rate": 9.904597647581982e-06, |
| "loss": 0.2468, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.7835084116578469, |
| "grad_norm": 0.2562560339656447, |
| "learning_rate": 9.9024414597565e-06, |
| "loss": 0.2495, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.7866677197693706, |
| "grad_norm": 0.257309029607144, |
| "learning_rate": 9.90026141744723e-06, |
| "loss": 0.2474, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.7898270278808941, |
| "grad_norm": 0.2512470913827528, |
| "learning_rate": 9.898057531261904e-06, |
| "loss": 0.2472, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7929863359924176, |
| "grad_norm": 0.255003256149547, |
| "learning_rate": 9.89582981192427e-06, |
| "loss": 0.2443, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.7961456441039413, |
| "grad_norm": 0.24903458333429462, |
| "learning_rate": 9.893578270274054e-06, |
| "loss": 0.2473, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.7993049522154648, |
| "grad_norm": 0.25472580330098815, |
| "learning_rate": 9.891302917266886e-06, |
| "loss": 0.2501, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.8024642603269884, |
| "grad_norm": 0.2876016304280073, |
| "learning_rate": 9.889003763974272e-06, |
| "loss": 0.248, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.805623568438512, |
| "grad_norm": 0.26547023285812826, |
| "learning_rate": 9.886680821583512e-06, |
| "loss": 0.2462, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8087828765500356, |
| "grad_norm": 0.2541356171021246, |
| "learning_rate": 9.884334101397666e-06, |
| "loss": 0.2481, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.8119421846615591, |
| "grad_norm": 0.27501124097499335, |
| "learning_rate": 9.881963614835499e-06, |
| "loss": 0.2417, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.8151014927730827, |
| "grad_norm": 0.24719251381186472, |
| "learning_rate": 9.879569373431408e-06, |
| "loss": 0.2466, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.8182608008846063, |
| "grad_norm": 0.3030702344650738, |
| "learning_rate": 9.877151388835384e-06, |
| "loss": 0.2472, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.8214201089961298, |
| "grad_norm": 0.2516088806018057, |
| "learning_rate": 9.87470967281295e-06, |
| "loss": 0.2448, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8245794171076535, |
| "grad_norm": 0.27693954241387997, |
| "learning_rate": 9.872244237245096e-06, |
| "loss": 0.2453, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.827738725219177, |
| "grad_norm": 0.25236801181159096, |
| "learning_rate": 9.869755094128234e-06, |
| "loss": 0.2444, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.8308980333307006, |
| "grad_norm": 0.2511412465708672, |
| "learning_rate": 9.867242255574127e-06, |
| "loss": 0.2459, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.8340573414422241, |
| "grad_norm": 0.28252199150173957, |
| "learning_rate": 9.864705733809842e-06, |
| "loss": 0.245, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.8372166495537477, |
| "grad_norm": 0.25900018707343897, |
| "learning_rate": 9.862145541177681e-06, |
| "loss": 0.2434, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8403759576652713, |
| "grad_norm": 0.24962743931418693, |
| "learning_rate": 9.859561690135125e-06, |
| "loss": 0.2461, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.8435352657767948, |
| "grad_norm": 0.28196557811696227, |
| "learning_rate": 9.856954193254773e-06, |
| "loss": 0.2475, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.8466945738883185, |
| "grad_norm": 0.26537277820682614, |
| "learning_rate": 9.854323063224282e-06, |
| "loss": 0.2451, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.849853881999842, |
| "grad_norm": 0.2925684725032324, |
| "learning_rate": 9.851668312846303e-06, |
| "loss": 0.2453, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.8530131901113656, |
| "grad_norm": 0.31194880176939827, |
| "learning_rate": 9.848989955038422e-06, |
| "loss": 0.2446, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8561724982228892, |
| "grad_norm": 0.26907966931847127, |
| "learning_rate": 9.84628800283309e-06, |
| "loss": 0.2416, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.8593318063344128, |
| "grad_norm": 0.2855876195388184, |
| "learning_rate": 9.843562469377568e-06, |
| "loss": 0.2435, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.8624911144459363, |
| "grad_norm": 0.26929494670820325, |
| "learning_rate": 9.84081336793386e-06, |
| "loss": 0.246, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.8656504225574599, |
| "grad_norm": 0.2560753102085232, |
| "learning_rate": 9.838040711878648e-06, |
| "loss": 0.2423, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.8688097306689835, |
| "grad_norm": 0.2726015414676623, |
| "learning_rate": 9.835244514703223e-06, |
| "loss": 0.2427, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.871969038780507, |
| "grad_norm": 0.24937962474701536, |
| "learning_rate": 9.83242479001343e-06, |
| "loss": 0.2439, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.8751283468920307, |
| "grad_norm": 0.2758063411552256, |
| "learning_rate": 9.82958155152959e-06, |
| "loss": 0.2448, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.8782876550035542, |
| "grad_norm": 0.2905878430819007, |
| "learning_rate": 9.826714813086439e-06, |
| "loss": 0.2412, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.8814469631150778, |
| "grad_norm": 0.2558643463285868, |
| "learning_rate": 9.82382458863306e-06, |
| "loss": 0.2406, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.8846062712266014, |
| "grad_norm": 0.2642935160526405, |
| "learning_rate": 9.820910892232816e-06, |
| "loss": 0.2444, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8877655793381249, |
| "grad_norm": 0.2592764595190498, |
| "learning_rate": 9.817973738063283e-06, |
| "loss": 0.2386, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.8909248874496485, |
| "grad_norm": 0.2507512507039411, |
| "learning_rate": 9.815013140416171e-06, |
| "loss": 0.246, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.8940841955611721, |
| "grad_norm": 0.25908992057608105, |
| "learning_rate": 9.812029113697271e-06, |
| "loss": 0.2395, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.8972435036726957, |
| "grad_norm": 0.2612092586416161, |
| "learning_rate": 9.809021672426371e-06, |
| "loss": 0.24, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.9004028117842192, |
| "grad_norm": 0.2673187039088244, |
| "learning_rate": 9.805990831237194e-06, |
| "loss": 0.2444, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9035621198957429, |
| "grad_norm": 0.2437036853392356, |
| "learning_rate": 9.802936604877316e-06, |
| "loss": 0.241, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.9067214280072664, |
| "grad_norm": 0.2825822044337206, |
| "learning_rate": 9.799859008208112e-06, |
| "loss": 0.2419, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.90988073611879, |
| "grad_norm": 0.28354580397363727, |
| "learning_rate": 9.796758056204662e-06, |
| "loss": 0.2427, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.9130400442303136, |
| "grad_norm": 0.24849879403822714, |
| "learning_rate": 9.7936337639557e-06, |
| "loss": 0.2407, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.9161993523418371, |
| "grad_norm": 0.26017599732454766, |
| "learning_rate": 9.790486146663522e-06, |
| "loss": 0.2403, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9193586604533607, |
| "grad_norm": 0.2718913199408004, |
| "learning_rate": 9.78731521964392e-06, |
| "loss": 0.2428, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.9225179685648843, |
| "grad_norm": 0.23612120956344837, |
| "learning_rate": 9.784120998326115e-06, |
| "loss": 0.2401, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.9256772766764079, |
| "grad_norm": 0.24629353188155112, |
| "learning_rate": 9.780903498252665e-06, |
| "loss": 0.2392, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.9288365847879314, |
| "grad_norm": 0.2459160847664615, |
| "learning_rate": 9.777662735079406e-06, |
| "loss": 0.2404, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.9319958928994551, |
| "grad_norm": 0.2475965886432302, |
| "learning_rate": 9.77439872457536e-06, |
| "loss": 0.2412, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9351552010109786, |
| "grad_norm": 0.28786836243759434, |
| "learning_rate": 9.771111482622677e-06, |
| "loss": 0.2408, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.9383145091225021, |
| "grad_norm": 0.2556612314956361, |
| "learning_rate": 9.76780102521654e-06, |
| "loss": 0.2395, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.9414738172340258, |
| "grad_norm": 0.26540547133463965, |
| "learning_rate": 9.764467368465098e-06, |
| "loss": 0.2408, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.9446331253455493, |
| "grad_norm": 0.27099661447259094, |
| "learning_rate": 9.761110528589382e-06, |
| "loss": 0.2411, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.9477924334570729, |
| "grad_norm": 0.2665281174710895, |
| "learning_rate": 9.75773052192323e-06, |
| "loss": 0.2411, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9509517415685965, |
| "grad_norm": 0.27363802003153875, |
| "learning_rate": 9.754327364913208e-06, |
| "loss": 0.2378, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.9541110496801201, |
| "grad_norm": 0.2319637502396465, |
| "learning_rate": 9.75090107411852e-06, |
| "loss": 0.2423, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.9572703577916436, |
| "grad_norm": 0.24790838457350073, |
| "learning_rate": 9.747451666210946e-06, |
| "loss": 0.2418, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.9604296659031673, |
| "grad_norm": 0.28516778958623945, |
| "learning_rate": 9.743979157974739e-06, |
| "loss": 0.2416, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.9635889740146908, |
| "grad_norm": 0.24493900212151187, |
| "learning_rate": 9.740483566306565e-06, |
| "loss": 0.2398, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.9667482821262143, |
| "grad_norm": 0.2873027578326869, |
| "learning_rate": 9.736964908215402e-06, |
| "loss": 0.2396, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.969907590237738, |
| "grad_norm": 0.24814910114506777, |
| "learning_rate": 9.733423200822469e-06, |
| "loss": 0.2391, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.9730668983492615, |
| "grad_norm": 0.24906741016474904, |
| "learning_rate": 9.729858461361142e-06, |
| "loss": 0.242, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.9762262064607851, |
| "grad_norm": 0.2826980721069527, |
| "learning_rate": 9.726270707176859e-06, |
| "loss": 0.2399, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.9793855145723087, |
| "grad_norm": 0.26392318924970115, |
| "learning_rate": 9.722659955727055e-06, |
| "loss": 0.2395, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.9825448226838323, |
| "grad_norm": 0.2855609937725699, |
| "learning_rate": 9.719026224581054e-06, |
| "loss": 0.2379, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.9857041307953558, |
| "grad_norm": 0.2502787654544348, |
| "learning_rate": 9.715369531420006e-06, |
| "loss": 0.2394, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.9888634389068794, |
| "grad_norm": 0.23500842887120083, |
| "learning_rate": 9.711689894036785e-06, |
| "loss": 0.2366, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.992022747018403, |
| "grad_norm": 0.24586324530930273, |
| "learning_rate": 9.707987330335906e-06, |
| "loss": 0.2378, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.9951820551299265, |
| "grad_norm": 0.23967550366976703, |
| "learning_rate": 9.704261858333445e-06, |
| "loss": 0.2388, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.9983413632414502, |
| "grad_norm": 0.2725545852571603, |
| "learning_rate": 9.700513496156945e-06, |
| "loss": 0.2378, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.0012637232446093, |
| "grad_norm": 0.25488826469533976, |
| "learning_rate": 9.696742262045324e-06, |
| "loss": 0.2171, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.004423031356133, |
| "grad_norm": 0.2434423686192198, |
| "learning_rate": 9.692948174348798e-06, |
| "loss": 0.2256, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.0075823394676566, |
| "grad_norm": 0.29889977976697246, |
| "learning_rate": 9.689131251528778e-06, |
| "loss": 0.2249, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.0107416475791802, |
| "grad_norm": 0.2533307535141922, |
| "learning_rate": 9.685291512157793e-06, |
| "loss": 0.2265, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.0139009556907037, |
| "grad_norm": 0.2663724197725429, |
| "learning_rate": 9.68142897491939e-06, |
| "loss": 0.226, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.0170602638022272, |
| "grad_norm": 0.2791907738011004, |
| "learning_rate": 9.677543658608047e-06, |
| "loss": 0.2262, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.0202195719137508, |
| "grad_norm": 0.23622696465246065, |
| "learning_rate": 9.673635582129084e-06, |
| "loss": 0.2222, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.0233788800252746, |
| "grad_norm": 0.28281839802186026, |
| "learning_rate": 9.669704764498564e-06, |
| "loss": 0.2246, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.026538188136798, |
| "grad_norm": 0.26869842180643333, |
| "learning_rate": 9.66575122484321e-06, |
| "loss": 0.2249, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.0296974962483216, |
| "grad_norm": 0.26529568216979593, |
| "learning_rate": 9.661774982400301e-06, |
| "loss": 0.223, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.0328568043598452, |
| "grad_norm": 0.2453451964854557, |
| "learning_rate": 9.65777605651759e-06, |
| "loss": 0.2238, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.0360161124713687, |
| "grad_norm": 0.24227112445437904, |
| "learning_rate": 9.653754466653195e-06, |
| "loss": 0.222, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.0391754205828923, |
| "grad_norm": 0.26279583121049077, |
| "learning_rate": 9.649710232375526e-06, |
| "loss": 0.2236, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.042334728694416, |
| "grad_norm": 0.31955584307225327, |
| "learning_rate": 9.645643373363166e-06, |
| "loss": 0.2229, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.0454940368059396, |
| "grad_norm": 0.257107141717547, |
| "learning_rate": 9.64155390940479e-06, |
| "loss": 0.2256, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.048653344917463, |
| "grad_norm": 0.28510160571621773, |
| "learning_rate": 9.637441860399065e-06, |
| "loss": 0.2243, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.0518126530289866, |
| "grad_norm": 0.24137541551602262, |
| "learning_rate": 9.633307246354558e-06, |
| "loss": 0.2237, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.0549719611405102, |
| "grad_norm": 0.26654943800407926, |
| "learning_rate": 9.629150087389625e-06, |
| "loss": 0.2253, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.0581312692520337, |
| "grad_norm": 0.24362961602020153, |
| "learning_rate": 9.624970403732328e-06, |
| "loss": 0.2291, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.0612905773635575, |
| "grad_norm": 0.24350680136934802, |
| "learning_rate": 9.620768215720327e-06, |
| "loss": 0.2229, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.064449885475081, |
| "grad_norm": 0.2410847673171125, |
| "learning_rate": 9.61654354380079e-06, |
| "loss": 0.2291, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.0676091935866046, |
| "grad_norm": 0.24859633294553296, |
| "learning_rate": 9.612296408530279e-06, |
| "loss": 0.224, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.070768501698128, |
| "grad_norm": 0.33635244899144884, |
| "learning_rate": 9.608026830574666e-06, |
| "loss": 0.2219, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.0739278098096516, |
| "grad_norm": 0.288768968554062, |
| "learning_rate": 9.603734830709029e-06, |
| "loss": 0.2252, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.0770871179211752, |
| "grad_norm": 0.2697580145809562, |
| "learning_rate": 9.599420429817534e-06, |
| "loss": 0.2234, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.080246426032699, |
| "grad_norm": 0.2520287953499914, |
| "learning_rate": 9.595083648893361e-06, |
| "loss": 0.2218, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.0834057341442225, |
| "grad_norm": 0.2538820882514716, |
| "learning_rate": 9.59072450903858e-06, |
| "loss": 0.2245, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.086565042255746, |
| "grad_norm": 0.24006555139584898, |
| "learning_rate": 9.586343031464056e-06, |
| "loss": 0.2245, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.0897243503672696, |
| "grad_norm": 0.24296965136255919, |
| "learning_rate": 9.581939237489347e-06, |
| "loss": 0.2227, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.092883658478793, |
| "grad_norm": 0.2641190618925988, |
| "learning_rate": 9.577513148542601e-06, |
| "loss": 0.2224, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.0960429665903166, |
| "grad_norm": 0.22534300284603093, |
| "learning_rate": 9.573064786160447e-06, |
| "loss": 0.2265, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.0992022747018404, |
| "grad_norm": 0.24227259587968128, |
| "learning_rate": 9.568594171987894e-06, |
| "loss": 0.2269, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.102361582813364, |
| "grad_norm": 0.24660324114104168, |
| "learning_rate": 9.564101327778223e-06, |
| "loss": 0.2252, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.1055208909248875, |
| "grad_norm": 0.2727222419616193, |
| "learning_rate": 9.559586275392887e-06, |
| "loss": 0.2222, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.108680199036411, |
| "grad_norm": 0.23732734292357127, |
| "learning_rate": 9.555049036801394e-06, |
| "loss": 0.2251, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.1118395071479346, |
| "grad_norm": 0.2499998501537187, |
| "learning_rate": 9.550489634081213e-06, |
| "loss": 0.2235, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.114998815259458, |
| "grad_norm": 0.2505631262518965, |
| "learning_rate": 9.545908089417655e-06, |
| "loss": 0.2268, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.1181581233709816, |
| "grad_norm": 0.24831276616046985, |
| "learning_rate": 9.541304425103772e-06, |
| "loss": 0.2258, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.1213174314825054, |
| "grad_norm": 0.24804609760497534, |
| "learning_rate": 9.536678663540247e-06, |
| "loss": 0.2232, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.124476739594029, |
| "grad_norm": 0.2268843330169224, |
| "learning_rate": 9.532030827235285e-06, |
| "loss": 0.2223, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.1276360477055525, |
| "grad_norm": 0.22228229533403326, |
| "learning_rate": 9.527360938804503e-06, |
| "loss": 0.2261, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.130795355817076, |
| "grad_norm": 0.25616837563557016, |
| "learning_rate": 9.522669020970821e-06, |
| "loss": 0.2248, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.1339546639285996, |
| "grad_norm": 0.24799181444300183, |
| "learning_rate": 9.517955096564344e-06, |
| "loss": 0.2249, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.1371139720401233, |
| "grad_norm": 0.2382662787923193, |
| "learning_rate": 9.513219188522266e-06, |
| "loss": 0.2216, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.1402732801516469, |
| "grad_norm": 0.25171104996591714, |
| "learning_rate": 9.508461319888744e-06, |
| "loss": 0.2225, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.1434325882631704, |
| "grad_norm": 0.23415739363429328, |
| "learning_rate": 9.503681513814797e-06, |
| "loss": 0.2236, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.146591896374694, |
| "grad_norm": 0.2432154138833874, |
| "learning_rate": 9.498879793558184e-06, |
| "loss": 0.2234, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.1497512044862175, |
| "grad_norm": 0.22687839285475797, |
| "learning_rate": 9.494056182483293e-06, |
| "loss": 0.2222, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.152910512597741, |
| "grad_norm": 0.26826441248684857, |
| "learning_rate": 9.489210704061036e-06, |
| "loss": 0.2216, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.1560698207092646, |
| "grad_norm": 0.24724769280109898, |
| "learning_rate": 9.484343381868722e-06, |
| "loss": 0.2242, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.1592291288207883, |
| "grad_norm": 0.2584281422449668, |
| "learning_rate": 9.479454239589948e-06, |
| "loss": 0.2248, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.1623884369323119, |
| "grad_norm": 0.2445110224992667, |
| "learning_rate": 9.47454330101449e-06, |
| "loss": 0.2207, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.1655477450438354, |
| "grad_norm": 0.22768485500843333, |
| "learning_rate": 9.469610590038175e-06, |
| "loss": 0.2231, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.168707053155359, |
| "grad_norm": 0.23303026391453427, |
| "learning_rate": 9.464656130662775e-06, |
| "loss": 0.2237, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.1718663612668825, |
| "grad_norm": 0.2489422466683634, |
| "learning_rate": 9.45967994699588e-06, |
| "loss": 0.2249, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.1750256693784062, |
| "grad_norm": 0.26953611174046954, |
| "learning_rate": 9.454682063250798e-06, |
| "loss": 0.2214, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.1781849774899298, |
| "grad_norm": 0.24590519797285548, |
| "learning_rate": 9.449662503746416e-06, |
| "loss": 0.2238, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.1813442856014533, |
| "grad_norm": 0.23511526883880332, |
| "learning_rate": 9.444621292907095e-06, |
| "loss": 0.2224, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.1845035937129769, |
| "grad_norm": 0.22848149314341606, |
| "learning_rate": 9.439558455262547e-06, |
| "loss": 0.2214, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.1876629018245004, |
| "grad_norm": 0.25030584511199394, |
| "learning_rate": 9.43447401544772e-06, |
| "loss": 0.2206, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.190822209936024, |
| "grad_norm": 0.2395716904991911, |
| "learning_rate": 9.429367998202671e-06, |
| "loss": 0.2203, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.1939815180475475, |
| "grad_norm": 0.24389646038824445, |
| "learning_rate": 9.424240428372454e-06, |
| "loss": 0.2231, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.1971408261590712, |
| "grad_norm": 0.2511376763314995, |
| "learning_rate": 9.419091330906985e-06, |
| "loss": 0.2229, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.2003001342705948, |
| "grad_norm": 0.2309948734333885, |
| "learning_rate": 9.413920730860936e-06, |
| "loss": 0.2217, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.2034594423821183, |
| "grad_norm": 0.22403672252100287, |
| "learning_rate": 9.408728653393613e-06, |
| "loss": 0.2209, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.2066187504936419, |
| "grad_norm": 0.23770720039134421, |
| "learning_rate": 9.403515123768817e-06, |
| "loss": 0.2262, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.2097780586051654, |
| "grad_norm": 0.23637922011574056, |
| "learning_rate": 9.398280167354737e-06, |
| "loss": 0.2211, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.2129373667166892, |
| "grad_norm": 0.23752969597662485, |
| "learning_rate": 9.39302380962382e-06, |
| "loss": 0.2232, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.2160966748282127, |
| "grad_norm": 0.23711964499813667, |
| "learning_rate": 9.38774607615265e-06, |
| "loss": 0.2199, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.2192559829397362, |
| "grad_norm": 0.2594025147223781, |
| "learning_rate": 9.382446992621822e-06, |
| "loss": 0.2219, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.2224152910512598, |
| "grad_norm": 0.24109832395545655, |
| "learning_rate": 9.377126584815812e-06, |
| "loss": 0.2212, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.2255745991627833, |
| "grad_norm": 0.22269010324866226, |
| "learning_rate": 9.371784878622863e-06, |
| "loss": 0.221, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.2287339072743069, |
| "grad_norm": 0.22390895916446435, |
| "learning_rate": 9.36642190003485e-06, |
| "loss": 0.2206, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.2318932153858304, |
| "grad_norm": 0.27627761911305704, |
| "learning_rate": 9.361037675147152e-06, |
| "loss": 0.2209, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.2350525234973542, |
| "grad_norm": 0.2420598861318583, |
| "learning_rate": 9.355632230158537e-06, |
| "loss": 0.2179, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.2382118316088777, |
| "grad_norm": 0.2325111241248699, |
| "learning_rate": 9.35020559137102e-06, |
| "loss": 0.2195, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.2413711397204013, |
| "grad_norm": 0.24210875124091263, |
| "learning_rate": 9.344757785189743e-06, |
| "loss": 0.2209, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.2445304478319248, |
| "grad_norm": 0.23465411344270035, |
| "learning_rate": 9.339288838122848e-06, |
| "loss": 0.2218, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.2476897559434483, |
| "grad_norm": 0.24940394557867132, |
| "learning_rate": 9.333798776781344e-06, |
| "loss": 0.2207, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.250849064054972, |
| "grad_norm": 0.24164502781099362, |
| "learning_rate": 9.328287627878974e-06, |
| "loss": 0.2239, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.2540083721664956, |
| "grad_norm": 0.24030120002479546, |
| "learning_rate": 9.322755418232094e-06, |
| "loss": 0.2222, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.2571676802780192, |
| "grad_norm": 0.24260325995919346, |
| "learning_rate": 9.317202174759541e-06, |
| "loss": 0.2205, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.2603269883895427, |
| "grad_norm": 0.24390555474193987, |
| "learning_rate": 9.311627924482494e-06, |
| "loss": 0.2201, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.2634862965010663, |
| "grad_norm": 0.2373559456916262, |
| "learning_rate": 9.306032694524346e-06, |
| "loss": 0.2211, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.2669615354237422, |
| "grad_norm": 0.23593037133395878, |
| "learning_rate": 9.300416512110582e-06, |
| "loss": 0.2212, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.2701208435352658, |
| "grad_norm": 0.2683209112120544, |
| "learning_rate": 9.29477940456863e-06, |
| "loss": 0.2233, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.2732801516467893, |
| "grad_norm": 0.24625101625854237, |
| "learning_rate": 9.289121399327742e-06, |
| "loss": 0.2204, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.2764394597583129, |
| "grad_norm": 0.24487225016448577, |
| "learning_rate": 9.283442523918848e-06, |
| "loss": 0.2216, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.2795987678698366, |
| "grad_norm": 0.22210615337013012, |
| "learning_rate": 9.27774280597444e-06, |
| "loss": 0.2215, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.2827580759813602, |
| "grad_norm": 0.22108031546901602, |
| "learning_rate": 9.272022273228414e-06, |
| "loss": 0.2204, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.2859173840928837, |
| "grad_norm": 0.24375060995624975, |
| "learning_rate": 9.266280953515958e-06, |
| "loss": 0.2206, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.2890766922044072, |
| "grad_norm": 0.2480993257402374, |
| "learning_rate": 9.260518874773395e-06, |
| "loss": 0.2241, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.2922360003159308, |
| "grad_norm": 0.22812840181798105, |
| "learning_rate": 9.25473606503807e-06, |
| "loss": 0.2229, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.2953953084274543, |
| "grad_norm": 0.23578170889449204, |
| "learning_rate": 9.248932552448191e-06, |
| "loss": 0.2202, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.2985546165389779, |
| "grad_norm": 0.23821605850175842, |
| "learning_rate": 9.24310836524271e-06, |
| "loss": 0.223, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.3017139246505016, |
| "grad_norm": 0.22354575939426694, |
| "learning_rate": 9.237263531761178e-06, |
| "loss": 0.2206, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.3048732327620252, |
| "grad_norm": 0.25832314829209013, |
| "learning_rate": 9.2313980804436e-06, |
| "loss": 0.2231, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.3080325408735487, |
| "grad_norm": 0.25538340887467387, |
| "learning_rate": 9.225512039830316e-06, |
| "loss": 0.2175, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.3111918489850722, |
| "grad_norm": 0.25215426741015406, |
| "learning_rate": 9.219605438561836e-06, |
| "loss": 0.2237, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.3143511570965958, |
| "grad_norm": 0.2313336444417292, |
| "learning_rate": 9.213678305378728e-06, |
| "loss": 0.2172, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.3175104652081195, |
| "grad_norm": 0.2278642515048215, |
| "learning_rate": 9.207730669121458e-06, |
| "loss": 0.2198, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.320669773319643, |
| "grad_norm": 0.2202393432518963, |
| "learning_rate": 9.201762558730256e-06, |
| "loss": 0.2217, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.3238290814311666, |
| "grad_norm": 0.23993000450443083, |
| "learning_rate": 9.19577400324498e-06, |
| "loss": 0.218, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.3269883895426902, |
| "grad_norm": 0.21595002622084153, |
| "learning_rate": 9.189765031804965e-06, |
| "loss": 0.2176, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.3301476976542137, |
| "grad_norm": 0.2526302368911067, |
| "learning_rate": 9.183735673648893e-06, |
| "loss": 0.2201, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.3333070057657372, |
| "grad_norm": 0.2431796699930924, |
| "learning_rate": 9.177685958114641e-06, |
| "loss": 0.2197, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.3364663138772608, |
| "grad_norm": 0.22807380467388666, |
| "learning_rate": 9.171615914639143e-06, |
| "loss": 0.2184, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.3396256219887843, |
| "grad_norm": 0.23018273423897978, |
| "learning_rate": 9.16552557275824e-06, |
| "loss": 0.2212, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.342784930100308, |
| "grad_norm": 0.25116936857534927, |
| "learning_rate": 9.159414962106551e-06, |
| "loss": 0.2205, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.3459442382118316, |
| "grad_norm": 0.2535484242407173, |
| "learning_rate": 9.153284112417314e-06, |
| "loss": 0.2173, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.3491035463233552, |
| "grad_norm": 0.21916754861180746, |
| "learning_rate": 9.147133053522243e-06, |
| "loss": 0.2206, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.3522628544348787, |
| "grad_norm": 0.24428352296386502, |
| "learning_rate": 9.140961815351399e-06, |
| "loss": 0.2203, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.3554221625464025, |
| "grad_norm": 0.22564630207596492, |
| "learning_rate": 9.13477042793302e-06, |
| "loss": 0.2185, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.358581470657926, |
| "grad_norm": 0.2203268383689168, |
| "learning_rate": 9.128558921393391e-06, |
| "loss": 0.2196, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.3617407787694495, |
| "grad_norm": 0.22885301789778367, |
| "learning_rate": 9.122327325956697e-06, |
| "loss": 0.2188, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.364900086880973, |
| "grad_norm": 0.23826459623192525, |
| "learning_rate": 9.116075671944865e-06, |
| "loss": 0.2196, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.3680593949924966, |
| "grad_norm": 0.23832523948959192, |
| "learning_rate": 9.109803989777432e-06, |
| "loss": 0.22, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.3712187031040202, |
| "grad_norm": 0.23458728809270832, |
| "learning_rate": 9.103512309971381e-06, |
| "loss": 0.2201, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.3743780112155437, |
| "grad_norm": 0.2505289166305433, |
| "learning_rate": 9.097200663141007e-06, |
| "loss": 0.2204, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.3775373193270672, |
| "grad_norm": 0.23547611606996885, |
| "learning_rate": 9.090869079997756e-06, |
| "loss": 0.2176, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.380696627438591, |
| "grad_norm": 0.23300517643120455, |
| "learning_rate": 9.084517591350085e-06, |
| "loss": 0.2196, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.3838559355501145, |
| "grad_norm": 0.23821378937778367, |
| "learning_rate": 9.078146228103302e-06, |
| "loss": 0.2205, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.387015243661638, |
| "grad_norm": 0.24777379778962433, |
| "learning_rate": 9.07175502125943e-06, |
| "loss": 0.217, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.3901745517731616, |
| "grad_norm": 0.2475049075002878, |
| "learning_rate": 9.065344001917042e-06, |
| "loss": 0.2182, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.3933338598846854, |
| "grad_norm": 0.22578213255742105, |
| "learning_rate": 9.058913201271116e-06, |
| "loss": 0.2193, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.396493167996209, |
| "grad_norm": 0.2363691948058237, |
| "learning_rate": 9.052462650612886e-06, |
| "loss": 0.2203, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.3996524761077325, |
| "grad_norm": 0.22912256425928543, |
| "learning_rate": 9.045992381329678e-06, |
| "loss": 0.219, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.402811784219256, |
| "grad_norm": 0.24248708988856363, |
| "learning_rate": 9.039502424904778e-06, |
| "loss": 0.2197, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.4059710923307795, |
| "grad_norm": 0.24586583420100006, |
| "learning_rate": 9.032992812917253e-06, |
| "loss": 0.217, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.409130400442303, |
| "grad_norm": 0.2352960559617627, |
| "learning_rate": 9.026463577041823e-06, |
| "loss": 0.2187, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.4122897085538266, |
| "grad_norm": 0.24173709757573497, |
| "learning_rate": 9.019914749048689e-06, |
| "loss": 0.221, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.4154490166653502, |
| "grad_norm": 0.23957045024343024, |
| "learning_rate": 9.01334636080338e-06, |
| "loss": 0.2186, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.418608324776874, |
| "grad_norm": 0.2419319656288058, |
| "learning_rate": 9.00675844426661e-06, |
| "loss": 0.2194, |
| "step": 4490 |
| }, |
| { |
| "epoch": 1.4217676328883975, |
| "grad_norm": 0.21881593406458033, |
| "learning_rate": 9.00015103149411e-06, |
| "loss": 0.2186, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.424926940999921, |
| "grad_norm": 0.24136336388843918, |
| "learning_rate": 8.993524154636475e-06, |
| "loss": 0.2194, |
| "step": 4510 |
| }, |
| { |
| "epoch": 1.4280862491114446, |
| "grad_norm": 0.24438868998683702, |
| "learning_rate": 8.986877845939013e-06, |
| "loss": 0.2182, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.431245557222968, |
| "grad_norm": 0.2885217400112698, |
| "learning_rate": 8.980212137741584e-06, |
| "loss": 0.2177, |
| "step": 4530 |
| }, |
| { |
| "epoch": 1.4344048653344919, |
| "grad_norm": 0.24449644579392688, |
| "learning_rate": 8.973527062478438e-06, |
| "loss": 0.221, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.4375641734460154, |
| "grad_norm": 0.23524427018475397, |
| "learning_rate": 8.966822652678068e-06, |
| "loss": 0.2187, |
| "step": 4550 |
| }, |
| { |
| "epoch": 1.440723481557539, |
| "grad_norm": 0.2288850724812931, |
| "learning_rate": 8.960098940963042e-06, |
| "loss": 0.2181, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.4438827896690625, |
| "grad_norm": 0.21740524732083277, |
| "learning_rate": 8.953355960049848e-06, |
| "loss": 0.2171, |
| "step": 4570 |
| }, |
| { |
| "epoch": 1.447042097780586, |
| "grad_norm": 0.22515210418111187, |
| "learning_rate": 8.946593742748737e-06, |
| "loss": 0.2198, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.4502014058921096, |
| "grad_norm": 0.2241208354134438, |
| "learning_rate": 8.93981232196356e-06, |
| "loss": 0.2176, |
| "step": 4590 |
| }, |
| { |
| "epoch": 1.453360714003633, |
| "grad_norm": 0.23239653090715226, |
| "learning_rate": 8.933011730691609e-06, |
| "loss": 0.2193, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.4565200221151569, |
| "grad_norm": 0.2591474679289352, |
| "learning_rate": 8.926192002023457e-06, |
| "loss": 0.2152, |
| "step": 4610 |
| }, |
| { |
| "epoch": 1.4596793302266804, |
| "grad_norm": 0.25412246361370894, |
| "learning_rate": 8.919353169142794e-06, |
| "loss": 0.221, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.462838638338204, |
| "grad_norm": 0.24625096243975705, |
| "learning_rate": 8.912495265326274e-06, |
| "loss": 0.2176, |
| "step": 4630 |
| }, |
| { |
| "epoch": 1.4659979464497275, |
| "grad_norm": 0.23707218603543193, |
| "learning_rate": 8.905618323943337e-06, |
| "loss": 0.2195, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.469157254561251, |
| "grad_norm": 0.2428731037963279, |
| "learning_rate": 8.898722378456066e-06, |
| "loss": 0.2194, |
| "step": 4650 |
| }, |
| { |
| "epoch": 1.4723165626727748, |
| "grad_norm": 0.23709943181558119, |
| "learning_rate": 8.89180746241901e-06, |
| "loss": 0.2189, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.4754758707842983, |
| "grad_norm": 0.22433268675724657, |
| "learning_rate": 8.88487360947903e-06, |
| "loss": 0.2177, |
| "step": 4670 |
| }, |
| { |
| "epoch": 1.4786351788958219, |
| "grad_norm": 0.20952198638497543, |
| "learning_rate": 8.877920853375127e-06, |
| "loss": 0.2168, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.4817944870073454, |
| "grad_norm": 0.21717097039950753, |
| "learning_rate": 8.87094922793828e-06, |
| "loss": 0.2159, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.484953795118869, |
| "grad_norm": 0.23796957102894456, |
| "learning_rate": 8.86395876709129e-06, |
| "loss": 0.2151, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.4881131032303925, |
| "grad_norm": 0.2488834756996504, |
| "learning_rate": 8.856949504848602e-06, |
| "loss": 0.2154, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.491272411341916, |
| "grad_norm": 0.25338534020929043, |
| "learning_rate": 8.849921475316147e-06, |
| "loss": 0.2182, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.4944317194534398, |
| "grad_norm": 0.22919814445941372, |
| "learning_rate": 8.842874712691176e-06, |
| "loss": 0.2167, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.4975910275649633, |
| "grad_norm": 0.2256770534043575, |
| "learning_rate": 8.83580925126209e-06, |
| "loss": 0.2173, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.5007503356764869, |
| "grad_norm": 0.22659615457077606, |
| "learning_rate": 8.828725125408277e-06, |
| "loss": 0.2195, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.5039096437880104, |
| "grad_norm": 0.2525621807127862, |
| "learning_rate": 8.821622369599945e-06, |
| "loss": 0.2189, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.5070689518995342, |
| "grad_norm": 0.23576094914242274, |
| "learning_rate": 8.814501018397948e-06, |
| "loss": 0.2169, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.5102282600110577, |
| "grad_norm": 0.23522191103261228, |
| "learning_rate": 8.807361106453623e-06, |
| "loss": 0.2174, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.5133875681225812, |
| "grad_norm": 0.25147556181231623, |
| "learning_rate": 8.800202668508624e-06, |
| "loss": 0.2182, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.5165468762341048, |
| "grad_norm": 0.229827362444795, |
| "learning_rate": 8.793025739394747e-06, |
| "loss": 0.2188, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.5197061843456283, |
| "grad_norm": 0.23171793705210642, |
| "learning_rate": 8.78583035403376e-06, |
| "loss": 0.2192, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.5228654924571519, |
| "grad_norm": 0.24117108800540396, |
| "learning_rate": 8.778616547437244e-06, |
| "loss": 0.2154, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.5260248005686754, |
| "grad_norm": 0.2239538620184076, |
| "learning_rate": 8.771384354706407e-06, |
| "loss": 0.2189, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.529184108680199, |
| "grad_norm": 0.22643911431321556, |
| "learning_rate": 8.764133811031926e-06, |
| "loss": 0.219, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.5323434167917225, |
| "grad_norm": 0.2309154933998562, |
| "learning_rate": 8.756864951693767e-06, |
| "loss": 0.2161, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.5355027249032462, |
| "grad_norm": 0.22487805218107695, |
| "learning_rate": 8.749577812061019e-06, |
| "loss": 0.2155, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.5386620330147698, |
| "grad_norm": 0.2508153221101582, |
| "learning_rate": 8.74227242759172e-06, |
| "loss": 0.2165, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.5418213411262933, |
| "grad_norm": 0.2294176475790702, |
| "learning_rate": 8.734948833832684e-06, |
| "loss": 0.2194, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.544980649237817, |
| "grad_norm": 0.24196930828876226, |
| "learning_rate": 8.72760706641933e-06, |
| "loss": 0.2179, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.5481399573493406, |
| "grad_norm": 0.2067023139224669, |
| "learning_rate": 8.720247161075504e-06, |
| "loss": 0.2141, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.5512992654608642, |
| "grad_norm": 0.22150939402397368, |
| "learning_rate": 8.71286915361331e-06, |
| "loss": 0.2167, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.5544585735723877, |
| "grad_norm": 0.22765148037671673, |
| "learning_rate": 8.705473079932935e-06, |
| "loss": 0.2193, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.5576178816839112, |
| "grad_norm": 0.2541351407544811, |
| "learning_rate": 8.698058976022473e-06, |
| "loss": 0.2178, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.5607771897954348, |
| "grad_norm": 0.22926147481484557, |
| "learning_rate": 8.690626877957745e-06, |
| "loss": 0.217, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.5639364979069583, |
| "grad_norm": 0.22296251412940335, |
| "learning_rate": 8.683176821902135e-06, |
| "loss": 0.2169, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.5670958060184819, |
| "grad_norm": 0.20884559665213961, |
| "learning_rate": 8.675708844106407e-06, |
| "loss": 0.2177, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.5702551141300054, |
| "grad_norm": 0.24025626100283112, |
| "learning_rate": 8.668222980908527e-06, |
| "loss": 0.2197, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.573414422241529, |
| "grad_norm": 0.23773563954990934, |
| "learning_rate": 8.66071926873349e-06, |
| "loss": 0.2152, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.5765737303530527, |
| "grad_norm": 0.2145137444703373, |
| "learning_rate": 8.65319774409314e-06, |
| "loss": 0.2171, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.5797330384645762, |
| "grad_norm": 0.22033335790324296, |
| "learning_rate": 8.645658443585992e-06, |
| "loss": 0.2186, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.5828923465760998, |
| "grad_norm": 0.22804805519869795, |
| "learning_rate": 8.638101403897062e-06, |
| "loss": 0.2174, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.5860516546876235, |
| "grad_norm": 0.2289269600023062, |
| "learning_rate": 8.630526661797673e-06, |
| "loss": 0.2164, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.589210962799147, |
| "grad_norm": 0.2165226855369354, |
| "learning_rate": 8.622934254145292e-06, |
| "loss": 0.2173, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.5923702709106706, |
| "grad_norm": 0.21269449117197298, |
| "learning_rate": 8.615324217883341e-06, |
| "loss": 0.2158, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.5955295790221942, |
| "grad_norm": 0.21841160515610453, |
| "learning_rate": 8.607696590041021e-06, |
| "loss": 0.2181, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.5986888871337177, |
| "grad_norm": 0.23472942411314004, |
| "learning_rate": 8.60005140773313e-06, |
| "loss": 0.2158, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.6018481952452412, |
| "grad_norm": 0.2599264549024524, |
| "learning_rate": 8.592388708159881e-06, |
| "loss": 0.2202, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.6050075033567648, |
| "grad_norm": 0.22673012824944389, |
| "learning_rate": 8.584708528606728e-06, |
| "loss": 0.2165, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.6081668114682883, |
| "grad_norm": 0.2206529413228289, |
| "learning_rate": 8.577010906444174e-06, |
| "loss": 0.2187, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.6113261195798119, |
| "grad_norm": 0.22895351987221169, |
| "learning_rate": 8.569295879127602e-06, |
| "loss": 0.2159, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.6144854276913356, |
| "grad_norm": 0.23022447681447986, |
| "learning_rate": 8.56156348419708e-06, |
| "loss": 0.2157, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.6176447358028592, |
| "grad_norm": 0.2183798711751106, |
| "learning_rate": 8.553813759277185e-06, |
| "loss": 0.2169, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.6208040439143827, |
| "grad_norm": 0.22109788078597084, |
| "learning_rate": 8.546046742076819e-06, |
| "loss": 0.214, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.6239633520259065, |
| "grad_norm": 0.2690538258988973, |
| "learning_rate": 8.538262470389027e-06, |
| "loss": 0.2189, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.62712266013743, |
| "grad_norm": 0.22791429863696558, |
| "learning_rate": 8.530460982090812e-06, |
| "loss": 0.2146, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.6302819682489536, |
| "grad_norm": 0.23247506925321845, |
| "learning_rate": 8.522642315142948e-06, |
| "loss": 0.2174, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.633441276360477, |
| "grad_norm": 0.22524917238532172, |
| "learning_rate": 8.514806507589796e-06, |
| "loss": 0.2135, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.6366005844720006, |
| "grad_norm": 0.21074048952604266, |
| "learning_rate": 8.506953597559125e-06, |
| "loss": 0.2168, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.6397598925835242, |
| "grad_norm": 0.24438387341045803, |
| "learning_rate": 8.49908362326192e-06, |
| "loss": 0.2181, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.6429192006950477, |
| "grad_norm": 0.2486574671755116, |
| "learning_rate": 8.491196622992196e-06, |
| "loss": 0.2131, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.6460785088065713, |
| "grad_norm": 0.2256817140951327, |
| "learning_rate": 8.483292635126814e-06, |
| "loss": 0.2168, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.6492378169180948, |
| "grad_norm": 0.23268219491083575, |
| "learning_rate": 8.475371698125298e-06, |
| "loss": 0.2199, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.6523971250296186, |
| "grad_norm": 0.22549590195988278, |
| "learning_rate": 8.46743385052964e-06, |
| "loss": 0.2148, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.655556433141142, |
| "grad_norm": 0.22816476144990433, |
| "learning_rate": 8.459479130964114e-06, |
| "loss": 0.2161, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.6587157412526656, |
| "grad_norm": 0.22538580259762195, |
| "learning_rate": 8.451507578135099e-06, |
| "loss": 0.2153, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.6618750493641894, |
| "grad_norm": 0.2378857848685177, |
| "learning_rate": 8.443519230830871e-06, |
| "loss": 0.2165, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.665034357475713, |
| "grad_norm": 0.21964276783718112, |
| "learning_rate": 8.435514127921432e-06, |
| "loss": 0.2152, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.6681936655872365, |
| "grad_norm": 0.2278255608379614, |
| "learning_rate": 8.427492308358314e-06, |
| "loss": 0.2151, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.67135297369876, |
| "grad_norm": 0.22151461379504075, |
| "learning_rate": 8.419453811174384e-06, |
| "loss": 0.2159, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.6745122818102836, |
| "grad_norm": 0.23436228542360216, |
| "learning_rate": 8.411398675483668e-06, |
| "loss": 0.2139, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.677671589921807, |
| "grad_norm": 0.23116461868460544, |
| "learning_rate": 8.403326940481146e-06, |
| "loss": 0.2141, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.6808308980333306, |
| "grad_norm": 0.22621704434121967, |
| "learning_rate": 8.39523864544257e-06, |
| "loss": 0.2178, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.6839902061448542, |
| "grad_norm": 0.24437450666792623, |
| "learning_rate": 8.387133829724266e-06, |
| "loss": 0.2148, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.6871495142563777, |
| "grad_norm": 0.2285633102632245, |
| "learning_rate": 8.379012532762956e-06, |
| "loss": 0.211, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.6903088223679015, |
| "grad_norm": 0.23322244192934505, |
| "learning_rate": 8.370874794075548e-06, |
| "loss": 0.2159, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.693468130479425, |
| "grad_norm": 0.22379930823118785, |
| "learning_rate": 8.36272065325896e-06, |
| "loss": 0.2139, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.6966274385909486, |
| "grad_norm": 0.2120873572584099, |
| "learning_rate": 8.354550149989912e-06, |
| "loss": 0.2152, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.6997867467024723, |
| "grad_norm": 0.2274107944742338, |
| "learning_rate": 8.346363324024752e-06, |
| "loss": 0.2154, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.7029460548139959, |
| "grad_norm": 0.22062326975674185, |
| "learning_rate": 8.338160215199239e-06, |
| "loss": 0.213, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.7061053629255194, |
| "grad_norm": 0.20566039440830033, |
| "learning_rate": 8.329940863428372e-06, |
| "loss": 0.2142, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.709264671037043, |
| "grad_norm": 0.22328688203535454, |
| "learning_rate": 8.321705308706178e-06, |
| "loss": 0.2174, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.7124239791485665, |
| "grad_norm": 0.2284198217298347, |
| "learning_rate": 8.313453591105534e-06, |
| "loss": 0.2166, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.71558328726009, |
| "grad_norm": 0.24234082572473092, |
| "learning_rate": 8.305185750777951e-06, |
| "loss": 0.2153, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.7187425953716136, |
| "grad_norm": 0.21687647127855095, |
| "learning_rate": 8.296901827953403e-06, |
| "loss": 0.2164, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.721901903483137, |
| "grad_norm": 0.2121864508017843, |
| "learning_rate": 8.288601862940109e-06, |
| "loss": 0.2139, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.7250612115946606, |
| "grad_norm": 0.22730841948699315, |
| "learning_rate": 8.280285896124351e-06, |
| "loss": 0.2174, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.7282205197061844, |
| "grad_norm": 0.2364301598385313, |
| "learning_rate": 8.271953967970273e-06, |
| "loss": 0.214, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.731379827817708, |
| "grad_norm": 0.2502748891595353, |
| "learning_rate": 8.263606119019684e-06, |
| "loss": 0.2158, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.7345391359292315, |
| "grad_norm": 0.23803855035434604, |
| "learning_rate": 8.255242389891863e-06, |
| "loss": 0.217, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.737698444040755, |
| "grad_norm": 0.23212647695583102, |
| "learning_rate": 8.246862821283354e-06, |
| "loss": 0.2147, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.7408577521522788, |
| "grad_norm": 0.24431104358398667, |
| "learning_rate": 8.238467453967778e-06, |
| "loss": 0.2154, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.7440170602638023, |
| "grad_norm": 0.21470725579185915, |
| "learning_rate": 8.23005632879563e-06, |
| "loss": 0.2141, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.7471763683753259, |
| "grad_norm": 0.21322893537867463, |
| "learning_rate": 8.221629486694076e-06, |
| "loss": 0.2137, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.7503356764868494, |
| "grad_norm": 0.22024980684855178, |
| "learning_rate": 8.213186968666761e-06, |
| "loss": 0.216, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.753494984598373, |
| "grad_norm": 0.21887176967393748, |
| "learning_rate": 8.20472881579361e-06, |
| "loss": 0.2162, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.7566542927098965, |
| "grad_norm": 0.21439885224310692, |
| "learning_rate": 8.196255069230618e-06, |
| "loss": 0.215, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.75981360082142, |
| "grad_norm": 0.22279903289185554, |
| "learning_rate": 8.187765770209662e-06, |
| "loss": 0.2149, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.7629729089329436, |
| "grad_norm": 0.23141919021697108, |
| "learning_rate": 8.179260960038286e-06, |
| "loss": 0.2158, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.766132217044467, |
| "grad_norm": 0.21177829542109405, |
| "learning_rate": 8.17074068009952e-06, |
| "loss": 0.2162, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.7692915251559909, |
| "grad_norm": 0.23088215353230407, |
| "learning_rate": 8.162204971851662e-06, |
| "loss": 0.215, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.7724508332675144, |
| "grad_norm": 0.22123248711116683, |
| "learning_rate": 8.153653876828081e-06, |
| "loss": 0.2128, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.775610141379038, |
| "grad_norm": 0.21216354452493327, |
| "learning_rate": 8.145087436637014e-06, |
| "loss": 0.2161, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.7787694494905617, |
| "grad_norm": 0.2176779002539683, |
| "learning_rate": 8.13650569296137e-06, |
| "loss": 0.2147, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.7819287576020852, |
| "grad_norm": 0.22379805142011377, |
| "learning_rate": 8.12790868755852e-06, |
| "loss": 0.2149, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.7850880657136088, |
| "grad_norm": 0.23156638443169314, |
| "learning_rate": 8.119296462260094e-06, |
| "loss": 0.2116, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.7882473738251323, |
| "grad_norm": 0.23142885687079817, |
| "learning_rate": 8.110669058971783e-06, |
| "loss": 0.2168, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.7914066819366559, |
| "grad_norm": 0.21278539872224353, |
| "learning_rate": 8.102026519673127e-06, |
| "loss": 0.2136, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.7945659900481794, |
| "grad_norm": 0.22604551176497345, |
| "learning_rate": 8.093368886417323e-06, |
| "loss": 0.2139, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.797725298159703, |
| "grad_norm": 0.21945486381827597, |
| "learning_rate": 8.084696201331005e-06, |
| "loss": 0.2148, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.8008846062712265, |
| "grad_norm": 0.2554690895004349, |
| "learning_rate": 8.07600850661405e-06, |
| "loss": 0.215, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.80404391438275, |
| "grad_norm": 0.24996286626429107, |
| "learning_rate": 8.067305844539369e-06, |
| "loss": 0.2183, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.8072032224942738, |
| "grad_norm": 0.24383324293020725, |
| "learning_rate": 8.058588257452705e-06, |
| "loss": 0.2121, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.8103625306057973, |
| "grad_norm": 0.2169636811525225, |
| "learning_rate": 8.049855787772416e-06, |
| "loss": 0.2145, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.8135218387173209, |
| "grad_norm": 0.24944768584905086, |
| "learning_rate": 8.041108477989283e-06, |
| "loss": 0.2141, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.8166811468288446, |
| "grad_norm": 0.21757143809376578, |
| "learning_rate": 8.032346370666297e-06, |
| "loss": 0.2138, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.8198404549403682, |
| "grad_norm": 0.20693258010147997, |
| "learning_rate": 8.023569508438444e-06, |
| "loss": 0.2159, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.8229997630518917, |
| "grad_norm": 0.21253234218456654, |
| "learning_rate": 8.014777934012515e-06, |
| "loss": 0.2156, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.8261590711634152, |
| "grad_norm": 0.2199083063027526, |
| "learning_rate": 8.005971690166879e-06, |
| "loss": 0.2147, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.8293183792749388, |
| "grad_norm": 0.24052412314651891, |
| "learning_rate": 7.99715081975129e-06, |
| "loss": 0.2129, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.8324776873864623, |
| "grad_norm": 0.21391433161968806, |
| "learning_rate": 7.98831536568667e-06, |
| "loss": 0.2142, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.8356369954979859, |
| "grad_norm": 0.2071590477738613, |
| "learning_rate": 7.979465370964904e-06, |
| "loss": 0.2111, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.8387963036095094, |
| "grad_norm": 0.20813594996772833, |
| "learning_rate": 7.97060087864863e-06, |
| "loss": 0.2139, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.841955611721033, |
| "grad_norm": 0.21828875123993477, |
| "learning_rate": 7.961721931871023e-06, |
| "loss": 0.2125, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.8451149198325567, |
| "grad_norm": 0.22962238030556992, |
| "learning_rate": 7.9528285738356e-06, |
| "loss": 0.214, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.8482742279440803, |
| "grad_norm": 0.22398065321127122, |
| "learning_rate": 7.943920847815995e-06, |
| "loss": 0.2111, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.8514335360556038, |
| "grad_norm": 0.23666023387603707, |
| "learning_rate": 7.934998797155757e-06, |
| "loss": 0.2136, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.8545928441671276, |
| "grad_norm": 0.24325581332600177, |
| "learning_rate": 7.926062465268133e-06, |
| "loss": 0.2146, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.857752152278651, |
| "grad_norm": 0.22514971896352026, |
| "learning_rate": 7.917111895635865e-06, |
| "loss": 0.2143, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.8609114603901746, |
| "grad_norm": 0.21406995986557276, |
| "learning_rate": 7.908147131810968e-06, |
| "loss": 0.2147, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.8640707685016982, |
| "grad_norm": 0.22801297726410702, |
| "learning_rate": 7.899168217414526e-06, |
| "loss": 0.2124, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.8672300766132217, |
| "grad_norm": 0.20686065594624745, |
| "learning_rate": 7.890175196136484e-06, |
| "loss": 0.2128, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.8703893847247453, |
| "grad_norm": 0.219199870102877, |
| "learning_rate": 7.881168111735417e-06, |
| "loss": 0.212, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.8735486928362688, |
| "grad_norm": 0.2106377311590809, |
| "learning_rate": 7.872147008038335e-06, |
| "loss": 0.2131, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.8767080009477923, |
| "grad_norm": 0.22171325213770007, |
| "learning_rate": 7.863111928940465e-06, |
| "loss": 0.2144, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.8798673090593159, |
| "grad_norm": 0.2408651871754287, |
| "learning_rate": 7.854062918405034e-06, |
| "loss": 0.2145, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.8830266171708396, |
| "grad_norm": 0.22645870425489717, |
| "learning_rate": 7.845000020463058e-06, |
| "loss": 0.2157, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.8861859252823632, |
| "grad_norm": 0.21229610496145646, |
| "learning_rate": 7.835923279213124e-06, |
| "loss": 0.2153, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.8893452333938867, |
| "grad_norm": 0.22168579889983991, |
| "learning_rate": 7.826832738821182e-06, |
| "loss": 0.2135, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.8925045415054105, |
| "grad_norm": 0.2503714943772614, |
| "learning_rate": 7.817728443520324e-06, |
| "loss": 0.214, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.895663849616934, |
| "grad_norm": 0.21389350571126534, |
| "learning_rate": 7.808610437610572e-06, |
| "loss": 0.2139, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.8988231577284576, |
| "grad_norm": 0.21396113064462294, |
| "learning_rate": 7.799478765458665e-06, |
| "loss": 0.215, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.901982465839981, |
| "grad_norm": 0.21646665487064554, |
| "learning_rate": 7.790333471497831e-06, |
| "loss": 0.2137, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.9051417739515046, |
| "grad_norm": 0.2173302972697527, |
| "learning_rate": 7.781174600227587e-06, |
| "loss": 0.215, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.9083010820630282, |
| "grad_norm": 0.208526571079878, |
| "learning_rate": 7.772002196213517e-06, |
| "loss": 0.2144, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.9114603901745517, |
| "grad_norm": 0.23762678091854472, |
| "learning_rate": 7.762816304087042e-06, |
| "loss": 0.2154, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.9146196982860753, |
| "grad_norm": 0.23394045959731094, |
| "learning_rate": 7.753616968545223e-06, |
| "loss": 0.214, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.9177790063975988, |
| "grad_norm": 0.21611757282074848, |
| "learning_rate": 7.744404234350536e-06, |
| "loss": 0.2125, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.9209383145091226, |
| "grad_norm": 0.21843363212182215, |
| "learning_rate": 7.735178146330647e-06, |
| "loss": 0.2119, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.924097622620646, |
| "grad_norm": 0.20825290374476896, |
| "learning_rate": 7.7259387493782e-06, |
| "loss": 0.2114, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.9272569307321696, |
| "grad_norm": 0.20701940040618752, |
| "learning_rate": 7.716686088450601e-06, |
| "loss": 0.2118, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.9304162388436932, |
| "grad_norm": 0.22116343279147632, |
| "learning_rate": 7.707420208569793e-06, |
| "loss": 0.2114, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.933575546955217, |
| "grad_norm": 0.21636101800504964, |
| "learning_rate": 7.698141154822048e-06, |
| "loss": 0.216, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.9367348550667405, |
| "grad_norm": 0.21079410410879035, |
| "learning_rate": 7.68884897235773e-06, |
| "loss": 0.213, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.939894163178264, |
| "grad_norm": 0.24490335061347607, |
| "learning_rate": 7.679543706391088e-06, |
| "loss": 0.2128, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.9430534712897876, |
| "grad_norm": 0.22964720738637706, |
| "learning_rate": 7.670225402200037e-06, |
| "loss": 0.2111, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.946212779401311, |
| "grad_norm": 0.21860013623402114, |
| "learning_rate": 7.660894105125932e-06, |
| "loss": 0.2134, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.9493720875128346, |
| "grad_norm": 0.2564661587161801, |
| "learning_rate": 7.651549860573347e-06, |
| "loss": 0.2097, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.9525313956243582, |
| "grad_norm": 0.22698394312118578, |
| "learning_rate": 7.642192714009861e-06, |
| "loss": 0.2124, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.9556907037358817, |
| "grad_norm": 0.2076381468352638, |
| "learning_rate": 7.632822710965826e-06, |
| "loss": 0.2115, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.9588500118474053, |
| "grad_norm": 0.2262277080179204, |
| "learning_rate": 7.623439897034155e-06, |
| "loss": 0.2161, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.962009319958929, |
| "grad_norm": 0.22968981943777275, |
| "learning_rate": 7.614044317870099e-06, |
| "loss": 0.212, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.9651686280704526, |
| "grad_norm": 0.21734537077847677, |
| "learning_rate": 7.604636019191018e-06, |
| "loss": 0.2112, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.968327936181976, |
| "grad_norm": 0.2162091973430056, |
| "learning_rate": 7.595215046776165e-06, |
| "loss": 0.2112, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.9714872442934999, |
| "grad_norm": 0.22143440320403707, |
| "learning_rate": 7.585781446466464e-06, |
| "loss": 0.2108, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.9746465524050234, |
| "grad_norm": 0.21858157789999919, |
| "learning_rate": 7.5763352641642785e-06, |
| "loss": 0.2145, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.977805860516547, |
| "grad_norm": 0.23297936516023993, |
| "learning_rate": 7.566876545833197e-06, |
| "loss": 0.2123, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.9809651686280705, |
| "grad_norm": 0.22908027579219403, |
| "learning_rate": 7.55740533749781e-06, |
| "loss": 0.2114, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.984124476739594, |
| "grad_norm": 0.2202073126271702, |
| "learning_rate": 7.547921685243475e-06, |
| "loss": 0.2113, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.9872837848511176, |
| "grad_norm": 0.2309779316659973, |
| "learning_rate": 7.538425635216105e-06, |
| "loss": 0.2136, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.990443092962641, |
| "grad_norm": 0.2116437633996568, |
| "learning_rate": 7.5289172336219375e-06, |
| "loss": 0.2122, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.9936024010741646, |
| "grad_norm": 0.21586666623876277, |
| "learning_rate": 7.51939652672731e-06, |
| "loss": 0.2098, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.9967617091856882, |
| "grad_norm": 0.22548426849872574, |
| "learning_rate": 7.509863560858432e-06, |
| "loss": 0.2114, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.999921017297212, |
| "grad_norm": 0.2263338267805787, |
| "learning_rate": 7.5003183824011726e-06, |
| "loss": 0.2131, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.0031593081115235, |
| "grad_norm": 0.2127546790886278, |
| "learning_rate": 7.490761037800816e-06, |
| "loss": 0.1968, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.006318616223047, |
| "grad_norm": 0.2069882189331785, |
| "learning_rate": 7.48119157356185e-06, |
| "loss": 0.1914, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.0094779243345706, |
| "grad_norm": 0.20703568644153889, |
| "learning_rate": 7.471610036247733e-06, |
| "loss": 0.1897, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.012637232446094, |
| "grad_norm": 0.2055646371637049, |
| "learning_rate": 7.462016472480668e-06, |
| "loss": 0.194, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.0157965405576177, |
| "grad_norm": 0.2226339706612743, |
| "learning_rate": 7.452410928941378e-06, |
| "loss": 0.1921, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.0189558486691412, |
| "grad_norm": 0.22355427767751265, |
| "learning_rate": 7.442793452368879e-06, |
| "loss": 0.1918, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.0221151567806652, |
| "grad_norm": 0.2073037658523876, |
| "learning_rate": 7.433164089560251e-06, |
| "loss": 0.192, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.0252744648921888, |
| "grad_norm": 0.23504421002096731, |
| "learning_rate": 7.423522887370405e-06, |
| "loss": 0.1904, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.0284337730037123, |
| "grad_norm": 0.20581970412966663, |
| "learning_rate": 7.413869892711867e-06, |
| "loss": 0.1918, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.031593081115236, |
| "grad_norm": 0.21545977722772292, |
| "learning_rate": 7.40420515255454e-06, |
| "loss": 0.1953, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.0347523892267594, |
| "grad_norm": 0.21275493409460883, |
| "learning_rate": 7.394528713925482e-06, |
| "loss": 0.1926, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.037911697338283, |
| "grad_norm": 0.2100072176135639, |
| "learning_rate": 7.38484062390867e-06, |
| "loss": 0.1928, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.0410710054498065, |
| "grad_norm": 0.2152551253488428, |
| "learning_rate": 7.375140929644776e-06, |
| "loss": 0.1913, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.04423031356133, |
| "grad_norm": 0.23722168142518216, |
| "learning_rate": 7.365429678330938e-06, |
| "loss": 0.193, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.0473896216728535, |
| "grad_norm": 0.21165262894718706, |
| "learning_rate": 7.355706917220524e-06, |
| "loss": 0.1919, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.050548929784377, |
| "grad_norm": 0.19900789144822892, |
| "learning_rate": 7.345972693622916e-06, |
| "loss": 0.1934, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.0537082378959006, |
| "grad_norm": 0.22490326358812657, |
| "learning_rate": 7.336227054903258e-06, |
| "loss": 0.1947, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.056867546007424, |
| "grad_norm": 0.2140405521043961, |
| "learning_rate": 7.3264700484822504e-06, |
| "loss": 0.1916, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.060026854118948, |
| "grad_norm": 0.21185203080938914, |
| "learning_rate": 7.316701721835899e-06, |
| "loss": 0.1921, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.0631861622304717, |
| "grad_norm": 0.21223665536640837, |
| "learning_rate": 7.306922122495296e-06, |
| "loss": 0.1906, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.0663454703419952, |
| "grad_norm": 0.21789344473823863, |
| "learning_rate": 7.297131298046381e-06, |
| "loss": 0.1912, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.0695047784535188, |
| "grad_norm": 0.2186374831032629, |
| "learning_rate": 7.287329296129716e-06, |
| "loss": 0.1905, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.0726640865650423, |
| "grad_norm": 0.21221974732525253, |
| "learning_rate": 7.2775161644402504e-06, |
| "loss": 0.1911, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.075823394676566, |
| "grad_norm": 0.22468404766263275, |
| "learning_rate": 7.267691950727089e-06, |
| "loss": 0.1918, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.0789827027880894, |
| "grad_norm": 0.22280157355766247, |
| "learning_rate": 7.257856702793262e-06, |
| "loss": 0.1953, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.082142010899613, |
| "grad_norm": 0.2185553426185545, |
| "learning_rate": 7.248010468495486e-06, |
| "loss": 0.1927, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.0853013190111365, |
| "grad_norm": 0.21359923659529395, |
| "learning_rate": 7.238153295743936e-06, |
| "loss": 0.1914, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.08846062712266, |
| "grad_norm": 0.21132207896334118, |
| "learning_rate": 7.228285232502016e-06, |
| "loss": 0.1916, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.0916199352341835, |
| "grad_norm": 0.22929120062926403, |
| "learning_rate": 7.218406326786119e-06, |
| "loss": 0.1927, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.094779243345707, |
| "grad_norm": 0.21485998840313886, |
| "learning_rate": 7.208516626665394e-06, |
| "loss": 0.1937, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.097938551457231, |
| "grad_norm": 0.21872022787914924, |
| "learning_rate": 7.198616180261515e-06, |
| "loss": 0.1925, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.1010978595687546, |
| "grad_norm": 0.2221942323992186, |
| "learning_rate": 7.188705035748447e-06, |
| "loss": 0.191, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.104257167680278, |
| "grad_norm": 0.23507618605208405, |
| "learning_rate": 7.178783241352209e-06, |
| "loss": 0.1913, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.1074164757918017, |
| "grad_norm": 0.2124831637924298, |
| "learning_rate": 7.168850845350642e-06, |
| "loss": 0.1937, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.1105757839033252, |
| "grad_norm": 0.27819912434383975, |
| "learning_rate": 7.158907896073171e-06, |
| "loss": 0.1907, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.1137350920148488, |
| "grad_norm": 0.2140478018660185, |
| "learning_rate": 7.148954441900569e-06, |
| "loss": 0.1937, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.1168944001263723, |
| "grad_norm": 0.20813723734002745, |
| "learning_rate": 7.13899053126473e-06, |
| "loss": 0.1928, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.120053708237896, |
| "grad_norm": 0.21914914363643226, |
| "learning_rate": 7.1290162126484255e-06, |
| "loss": 0.1911, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.1232130163494194, |
| "grad_norm": 0.20921158225021147, |
| "learning_rate": 7.119031534585068e-06, |
| "loss": 0.1911, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.126372324460943, |
| "grad_norm": 0.21063028898492417, |
| "learning_rate": 7.109036545658478e-06, |
| "loss": 0.1909, |
| "step": 6730 |
| }, |
| { |
| "epoch": 2.1295316325724665, |
| "grad_norm": 0.20519838588236927, |
| "learning_rate": 7.099031294502651e-06, |
| "loss": 0.1926, |
| "step": 6740 |
| }, |
| { |
| "epoch": 2.13269094068399, |
| "grad_norm": 0.19866370874843575, |
| "learning_rate": 7.089015829801513e-06, |
| "loss": 0.1918, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.1358502487955136, |
| "grad_norm": 0.23526951667689947, |
| "learning_rate": 7.078990200288685e-06, |
| "loss": 0.1914, |
| "step": 6760 |
| }, |
| { |
| "epoch": 2.1390095569070375, |
| "grad_norm": 0.2119435665033379, |
| "learning_rate": 7.0689544547472564e-06, |
| "loss": 0.1926, |
| "step": 6770 |
| }, |
| { |
| "epoch": 2.142168865018561, |
| "grad_norm": 0.2144608848885566, |
| "learning_rate": 7.058908642009532e-06, |
| "loss": 0.1929, |
| "step": 6780 |
| }, |
| { |
| "epoch": 2.1453281731300846, |
| "grad_norm": 0.2359992087538063, |
| "learning_rate": 7.048852810956805e-06, |
| "loss": 0.1939, |
| "step": 6790 |
| }, |
| { |
| "epoch": 2.148487481241608, |
| "grad_norm": 0.2210600215142186, |
| "learning_rate": 7.038787010519117e-06, |
| "loss": 0.1926, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.1516467893531317, |
| "grad_norm": 0.24951718000615125, |
| "learning_rate": 7.0287112896750166e-06, |
| "loss": 0.1911, |
| "step": 6810 |
| }, |
| { |
| "epoch": 2.1548060974646552, |
| "grad_norm": 0.2175553461891265, |
| "learning_rate": 7.018625697451327e-06, |
| "loss": 0.1934, |
| "step": 6820 |
| }, |
| { |
| "epoch": 2.1579654055761788, |
| "grad_norm": 0.20956901895344687, |
| "learning_rate": 7.008530282922896e-06, |
| "loss": 0.1913, |
| "step": 6830 |
| }, |
| { |
| "epoch": 2.1611247136877023, |
| "grad_norm": 0.2224814576315156, |
| "learning_rate": 6.998425095212378e-06, |
| "loss": 0.1927, |
| "step": 6840 |
| }, |
| { |
| "epoch": 2.164284021799226, |
| "grad_norm": 0.20337670529721538, |
| "learning_rate": 6.9883101834899694e-06, |
| "loss": 0.1922, |
| "step": 6850 |
| }, |
| { |
| "epoch": 2.1674433299107494, |
| "grad_norm": 0.2198594709438925, |
| "learning_rate": 6.978185596973192e-06, |
| "loss": 0.1917, |
| "step": 6860 |
| }, |
| { |
| "epoch": 2.170602638022273, |
| "grad_norm": 0.21033108894938493, |
| "learning_rate": 6.968051384926634e-06, |
| "loss": 0.1938, |
| "step": 6870 |
| }, |
| { |
| "epoch": 2.173761946133797, |
| "grad_norm": 0.22864890990862674, |
| "learning_rate": 6.957907596661729e-06, |
| "loss": 0.1915, |
| "step": 6880 |
| }, |
| { |
| "epoch": 2.1769212542453205, |
| "grad_norm": 0.21930675227402627, |
| "learning_rate": 6.9477542815365025e-06, |
| "loss": 0.1934, |
| "step": 6890 |
| }, |
| { |
| "epoch": 2.180080562356844, |
| "grad_norm": 0.2079046170539124, |
| "learning_rate": 6.937591488955335e-06, |
| "loss": 0.1913, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.1832398704683675, |
| "grad_norm": 0.227129021822185, |
| "learning_rate": 6.927419268368727e-06, |
| "loss": 0.1928, |
| "step": 6910 |
| }, |
| { |
| "epoch": 2.186399178579891, |
| "grad_norm": 0.21476017288096094, |
| "learning_rate": 6.917237669273047e-06, |
| "loss": 0.195, |
| "step": 6920 |
| }, |
| { |
| "epoch": 2.1895584866914146, |
| "grad_norm": 0.2101092960463205, |
| "learning_rate": 6.907046741210308e-06, |
| "loss": 0.1929, |
| "step": 6930 |
| }, |
| { |
| "epoch": 2.192717794802938, |
| "grad_norm": 0.21529278522230835, |
| "learning_rate": 6.8968465337679056e-06, |
| "loss": 0.1932, |
| "step": 6940 |
| }, |
| { |
| "epoch": 2.1958771029144617, |
| "grad_norm": 0.22276990935035607, |
| "learning_rate": 6.886637096578395e-06, |
| "loss": 0.1932, |
| "step": 6950 |
| }, |
| { |
| "epoch": 2.1990364110259852, |
| "grad_norm": 0.22806514291674243, |
| "learning_rate": 6.876418479319238e-06, |
| "loss": 0.1938, |
| "step": 6960 |
| }, |
| { |
| "epoch": 2.202195719137509, |
| "grad_norm": 0.20310603052055423, |
| "learning_rate": 6.866190731712566e-06, |
| "loss": 0.1945, |
| "step": 6970 |
| }, |
| { |
| "epoch": 2.2053550272490323, |
| "grad_norm": 0.2000583112376738, |
| "learning_rate": 6.8559539035249405e-06, |
| "loss": 0.1906, |
| "step": 6980 |
| }, |
| { |
| "epoch": 2.208514335360556, |
| "grad_norm": 0.21643622482829, |
| "learning_rate": 6.8457080445671e-06, |
| "loss": 0.1928, |
| "step": 6990 |
| }, |
| { |
| "epoch": 2.2116736434720794, |
| "grad_norm": 0.21135933665048376, |
| "learning_rate": 6.835453204693733e-06, |
| "loss": 0.1927, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.2148329515836034, |
| "grad_norm": 0.21880301226347054, |
| "learning_rate": 6.825189433803223e-06, |
| "loss": 0.1912, |
| "step": 7010 |
| }, |
| { |
| "epoch": 2.217992259695127, |
| "grad_norm": 0.21873208245413148, |
| "learning_rate": 6.814916781837413e-06, |
| "loss": 0.1922, |
| "step": 7020 |
| }, |
| { |
| "epoch": 2.2211515678066505, |
| "grad_norm": 0.19919301034094944, |
| "learning_rate": 6.804635298781358e-06, |
| "loss": 0.1914, |
| "step": 7030 |
| }, |
| { |
| "epoch": 2.224310875918174, |
| "grad_norm": 0.21709044589388574, |
| "learning_rate": 6.7943450346630845e-06, |
| "loss": 0.1949, |
| "step": 7040 |
| }, |
| { |
| "epoch": 2.2274701840296975, |
| "grad_norm": 0.19238634340689642, |
| "learning_rate": 6.784046039553347e-06, |
| "loss": 0.1926, |
| "step": 7050 |
| }, |
| { |
| "epoch": 2.230629492141221, |
| "grad_norm": 0.20786949384009987, |
| "learning_rate": 6.7737383635653805e-06, |
| "loss": 0.191, |
| "step": 7060 |
| }, |
| { |
| "epoch": 2.2337888002527446, |
| "grad_norm": 0.19859606904892732, |
| "learning_rate": 6.763422056854666e-06, |
| "loss": 0.1921, |
| "step": 7070 |
| }, |
| { |
| "epoch": 2.236948108364268, |
| "grad_norm": 0.22013791343337974, |
| "learning_rate": 6.753097169618672e-06, |
| "loss": 0.1892, |
| "step": 7080 |
| }, |
| { |
| "epoch": 2.2401074164757917, |
| "grad_norm": 0.22775949599854897, |
| "learning_rate": 6.742763752096625e-06, |
| "loss": 0.1924, |
| "step": 7090 |
| }, |
| { |
| "epoch": 2.2432667245873152, |
| "grad_norm": 0.2128328590833451, |
| "learning_rate": 6.732421854569254e-06, |
| "loss": 0.191, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.246426032698839, |
| "grad_norm": 0.21644470882444383, |
| "learning_rate": 6.722071527358557e-06, |
| "loss": 0.1935, |
| "step": 7110 |
| }, |
| { |
| "epoch": 2.2495853408103628, |
| "grad_norm": 0.21312175511661602, |
| "learning_rate": 6.7117128208275384e-06, |
| "loss": 0.1931, |
| "step": 7120 |
| }, |
| { |
| "epoch": 2.252744648921886, |
| "grad_norm": 0.22730243091149457, |
| "learning_rate": 6.701345785379987e-06, |
| "loss": 0.1922, |
| "step": 7130 |
| }, |
| { |
| "epoch": 2.25590395703341, |
| "grad_norm": 0.23578039614437363, |
| "learning_rate": 6.69097047146021e-06, |
| "loss": 0.1911, |
| "step": 7140 |
| }, |
| { |
| "epoch": 2.2590632651449334, |
| "grad_norm": 0.21392963590443645, |
| "learning_rate": 6.6805869295528e-06, |
| "loss": 0.1916, |
| "step": 7150 |
| }, |
| { |
| "epoch": 2.262222573256457, |
| "grad_norm": 0.21695056856445175, |
| "learning_rate": 6.6701952101823885e-06, |
| "loss": 0.1906, |
| "step": 7160 |
| }, |
| { |
| "epoch": 2.2653818813679805, |
| "grad_norm": 0.21461312040450123, |
| "learning_rate": 6.659795363913389e-06, |
| "loss": 0.1902, |
| "step": 7170 |
| }, |
| { |
| "epoch": 2.268541189479504, |
| "grad_norm": 0.2285522952212848, |
| "learning_rate": 6.649387441349767e-06, |
| "loss": 0.1903, |
| "step": 7180 |
| }, |
| { |
| "epoch": 2.2717004975910275, |
| "grad_norm": 0.2005700632922379, |
| "learning_rate": 6.6389714931347825e-06, |
| "loss": 0.1908, |
| "step": 7190 |
| }, |
| { |
| "epoch": 2.274859805702551, |
| "grad_norm": 0.22483814706686375, |
| "learning_rate": 6.628547569950748e-06, |
| "loss": 0.1943, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.2780191138140746, |
| "grad_norm": 0.21339185695046756, |
| "learning_rate": 6.61811572251878e-06, |
| "loss": 0.1923, |
| "step": 7210 |
| }, |
| { |
| "epoch": 2.281178421925598, |
| "grad_norm": 0.22013090536921387, |
| "learning_rate": 6.607676001598553e-06, |
| "loss": 0.1931, |
| "step": 7220 |
| }, |
| { |
| "epoch": 2.2843377300371217, |
| "grad_norm": 0.21150686936631666, |
| "learning_rate": 6.597228457988053e-06, |
| "loss": 0.1933, |
| "step": 7230 |
| }, |
| { |
| "epoch": 2.2874970381486452, |
| "grad_norm": 0.22030620455805627, |
| "learning_rate": 6.58677314252333e-06, |
| "loss": 0.1913, |
| "step": 7240 |
| }, |
| { |
| "epoch": 2.2906563462601692, |
| "grad_norm": 0.20354686364957472, |
| "learning_rate": 6.576310106078255e-06, |
| "loss": 0.1935, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.2938156543716928, |
| "grad_norm": 0.2266197363553553, |
| "learning_rate": 6.565839399564258e-06, |
| "loss": 0.1943, |
| "step": 7260 |
| }, |
| { |
| "epoch": 2.2969749624832163, |
| "grad_norm": 0.21248852160067305, |
| "learning_rate": 6.555361073930098e-06, |
| "loss": 0.1923, |
| "step": 7270 |
| }, |
| { |
| "epoch": 2.30013427059474, |
| "grad_norm": 0.20476133265911461, |
| "learning_rate": 6.544875180161605e-06, |
| "loss": 0.1908, |
| "step": 7280 |
| }, |
| { |
| "epoch": 2.3032935787062634, |
| "grad_norm": 0.20464174752072192, |
| "learning_rate": 6.534381769281437e-06, |
| "loss": 0.1905, |
| "step": 7290 |
| }, |
| { |
| "epoch": 2.306452886817787, |
| "grad_norm": 0.21832471477385795, |
| "learning_rate": 6.523880892348824e-06, |
| "loss": 0.194, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.3096121949293105, |
| "grad_norm": 0.19969512810776804, |
| "learning_rate": 6.513372600459329e-06, |
| "loss": 0.1914, |
| "step": 7310 |
| }, |
| { |
| "epoch": 2.312771503040834, |
| "grad_norm": 0.21518488246695772, |
| "learning_rate": 6.502856944744593e-06, |
| "loss": 0.1937, |
| "step": 7320 |
| }, |
| { |
| "epoch": 2.3159308111523575, |
| "grad_norm": 0.22335845799273413, |
| "learning_rate": 6.49233397637209e-06, |
| "loss": 0.1909, |
| "step": 7330 |
| }, |
| { |
| "epoch": 2.319090119263881, |
| "grad_norm": 0.2258181355415704, |
| "learning_rate": 6.48180374654487e-06, |
| "loss": 0.1918, |
| "step": 7340 |
| }, |
| { |
| "epoch": 2.3222494273754046, |
| "grad_norm": 0.22062705120624707, |
| "learning_rate": 6.471266306501325e-06, |
| "loss": 0.1925, |
| "step": 7350 |
| }, |
| { |
| "epoch": 2.325408735486928, |
| "grad_norm": 0.21451058313158836, |
| "learning_rate": 6.4607217075149265e-06, |
| "loss": 0.1909, |
| "step": 7360 |
| }, |
| { |
| "epoch": 2.3285680435984517, |
| "grad_norm": 0.20119666338740025, |
| "learning_rate": 6.450170000893978e-06, |
| "loss": 0.1912, |
| "step": 7370 |
| }, |
| { |
| "epoch": 2.3317273517099757, |
| "grad_norm": 0.20548278813927204, |
| "learning_rate": 6.439611237981373e-06, |
| "loss": 0.1903, |
| "step": 7380 |
| }, |
| { |
| "epoch": 2.3348866598214992, |
| "grad_norm": 0.2220285180002872, |
| "learning_rate": 6.429045470154333e-06, |
| "loss": 0.1922, |
| "step": 7390 |
| }, |
| { |
| "epoch": 2.3380459679330228, |
| "grad_norm": 0.20989770488757611, |
| "learning_rate": 6.418472748824172e-06, |
| "loss": 0.1905, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.3412052760445463, |
| "grad_norm": 0.21011715599104358, |
| "learning_rate": 6.407893125436031e-06, |
| "loss": 0.1918, |
| "step": 7410 |
| }, |
| { |
| "epoch": 2.34436458415607, |
| "grad_norm": 0.19838344804989674, |
| "learning_rate": 6.397306651468641e-06, |
| "loss": 0.1909, |
| "step": 7420 |
| }, |
| { |
| "epoch": 2.3475238922675934, |
| "grad_norm": 0.2169687246351887, |
| "learning_rate": 6.386713378434064e-06, |
| "loss": 0.1927, |
| "step": 7430 |
| }, |
| { |
| "epoch": 2.350683200379117, |
| "grad_norm": 0.204594134231941, |
| "learning_rate": 6.376113357877445e-06, |
| "loss": 0.1925, |
| "step": 7440 |
| }, |
| { |
| "epoch": 2.3538425084906405, |
| "grad_norm": 0.20966117593645678, |
| "learning_rate": 6.365506641376762e-06, |
| "loss": 0.1897, |
| "step": 7450 |
| }, |
| { |
| "epoch": 2.357001816602164, |
| "grad_norm": 0.20527775997224504, |
| "learning_rate": 6.354893280542576e-06, |
| "loss": 0.1929, |
| "step": 7460 |
| }, |
| { |
| "epoch": 2.3601611247136876, |
| "grad_norm": 0.20065093914673782, |
| "learning_rate": 6.344273327017778e-06, |
| "loss": 0.193, |
| "step": 7470 |
| }, |
| { |
| "epoch": 2.363320432825211, |
| "grad_norm": 0.22557801837324515, |
| "learning_rate": 6.333646832477334e-06, |
| "loss": 0.1898, |
| "step": 7480 |
| }, |
| { |
| "epoch": 2.366479740936735, |
| "grad_norm": 0.21882992475206528, |
| "learning_rate": 6.32301384862804e-06, |
| "loss": 0.1941, |
| "step": 7490 |
| }, |
| { |
| "epoch": 2.3696390490482586, |
| "grad_norm": 0.21331149519342535, |
| "learning_rate": 6.31237442720827e-06, |
| "loss": 0.1928, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.372798357159782, |
| "grad_norm": 0.2135806830134628, |
| "learning_rate": 6.301728619987722e-06, |
| "loss": 0.1912, |
| "step": 7510 |
| }, |
| { |
| "epoch": 2.3759576652713057, |
| "grad_norm": 0.20992047195155106, |
| "learning_rate": 6.29107647876716e-06, |
| "loss": 0.1924, |
| "step": 7520 |
| }, |
| { |
| "epoch": 2.3791169733828292, |
| "grad_norm": 0.22347906358520409, |
| "learning_rate": 6.280418055378175e-06, |
| "loss": 0.1929, |
| "step": 7530 |
| }, |
| { |
| "epoch": 2.3822762814943528, |
| "grad_norm": 0.20738356404474784, |
| "learning_rate": 6.269753401682924e-06, |
| "loss": 0.1921, |
| "step": 7540 |
| }, |
| { |
| "epoch": 2.3854355896058763, |
| "grad_norm": 0.21987439925453983, |
| "learning_rate": 6.25908256957388e-06, |
| "loss": 0.1914, |
| "step": 7550 |
| }, |
| { |
| "epoch": 2.3885948977174, |
| "grad_norm": 0.20180229410257866, |
| "learning_rate": 6.248405610973579e-06, |
| "loss": 0.1915, |
| "step": 7560 |
| }, |
| { |
| "epoch": 2.3917542058289234, |
| "grad_norm": 0.204547582458948, |
| "learning_rate": 6.237722577834366e-06, |
| "loss": 0.1926, |
| "step": 7570 |
| }, |
| { |
| "epoch": 2.394913513940447, |
| "grad_norm": 0.20792182149152452, |
| "learning_rate": 6.227033522138145e-06, |
| "loss": 0.1933, |
| "step": 7580 |
| }, |
| { |
| "epoch": 2.3980728220519705, |
| "grad_norm": 0.20943317325322225, |
| "learning_rate": 6.216338495896125e-06, |
| "loss": 0.192, |
| "step": 7590 |
| }, |
| { |
| "epoch": 2.401232130163494, |
| "grad_norm": 0.2184828315646208, |
| "learning_rate": 6.205637551148567e-06, |
| "loss": 0.1931, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.4043914382750176, |
| "grad_norm": 0.22863771124698407, |
| "learning_rate": 6.194930739964529e-06, |
| "loss": 0.1928, |
| "step": 7610 |
| }, |
| { |
| "epoch": 2.4075507463865415, |
| "grad_norm": 0.22337051398699578, |
| "learning_rate": 6.1842181144416145e-06, |
| "loss": 0.1924, |
| "step": 7620 |
| }, |
| { |
| "epoch": 2.410710054498065, |
| "grad_norm": 0.21034627944404075, |
| "learning_rate": 6.17349972670572e-06, |
| "loss": 0.1916, |
| "step": 7630 |
| }, |
| { |
| "epoch": 2.4138693626095886, |
| "grad_norm": 0.20200815442203482, |
| "learning_rate": 6.162775628910781e-06, |
| "loss": 0.1934, |
| "step": 7640 |
| }, |
| { |
| "epoch": 2.417028670721112, |
| "grad_norm": 0.20113233320753685, |
| "learning_rate": 6.152045873238512e-06, |
| "loss": 0.1915, |
| "step": 7650 |
| }, |
| { |
| "epoch": 2.4201879788326357, |
| "grad_norm": 0.2148019344588209, |
| "learning_rate": 6.141310511898162e-06, |
| "loss": 0.1901, |
| "step": 7660 |
| }, |
| { |
| "epoch": 2.4233472869441592, |
| "grad_norm": 0.2156955634575946, |
| "learning_rate": 6.130569597126257e-06, |
| "loss": 0.1896, |
| "step": 7670 |
| }, |
| { |
| "epoch": 2.426506595055683, |
| "grad_norm": 0.22700196837396544, |
| "learning_rate": 6.119823181186342e-06, |
| "loss": 0.1923, |
| "step": 7680 |
| }, |
| { |
| "epoch": 2.4296659031672063, |
| "grad_norm": 0.20832014160995668, |
| "learning_rate": 6.109071316368732e-06, |
| "loss": 0.1943, |
| "step": 7690 |
| }, |
| { |
| "epoch": 2.43282521127873, |
| "grad_norm": 0.22039569297008865, |
| "learning_rate": 6.0983140549902544e-06, |
| "loss": 0.1918, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.4359845193902534, |
| "grad_norm": 0.2032368232732068, |
| "learning_rate": 6.087551449393996e-06, |
| "loss": 0.1908, |
| "step": 7710 |
| }, |
| { |
| "epoch": 2.439143827501777, |
| "grad_norm": 0.21501063236631873, |
| "learning_rate": 6.0767835519490455e-06, |
| "loss": 0.193, |
| "step": 7720 |
| }, |
| { |
| "epoch": 2.442303135613301, |
| "grad_norm": 0.21020124149593053, |
| "learning_rate": 6.066010415050246e-06, |
| "loss": 0.1912, |
| "step": 7730 |
| }, |
| { |
| "epoch": 2.445462443724824, |
| "grad_norm": 0.21241622539470262, |
| "learning_rate": 6.0552320911179295e-06, |
| "loss": 0.1909, |
| "step": 7740 |
| }, |
| { |
| "epoch": 2.448621751836348, |
| "grad_norm": 0.21470252522546168, |
| "learning_rate": 6.04444863259767e-06, |
| "loss": 0.1928, |
| "step": 7750 |
| }, |
| { |
| "epoch": 2.4517810599478715, |
| "grad_norm": 0.21167973032496565, |
| "learning_rate": 6.033660091960025e-06, |
| "loss": 0.1936, |
| "step": 7760 |
| }, |
| { |
| "epoch": 2.454940368059395, |
| "grad_norm": 0.2051994018828898, |
| "learning_rate": 6.02286652170028e-06, |
| "loss": 0.1938, |
| "step": 7770 |
| }, |
| { |
| "epoch": 2.4580996761709186, |
| "grad_norm": 0.20457172842704202, |
| "learning_rate": 6.0120679743381945e-06, |
| "loss": 0.19, |
| "step": 7780 |
| }, |
| { |
| "epoch": 2.461258984282442, |
| "grad_norm": 0.2188284827007198, |
| "learning_rate": 6.001264502417749e-06, |
| "loss": 0.1923, |
| "step": 7790 |
| }, |
| { |
| "epoch": 2.4644182923939657, |
| "grad_norm": 0.2196693674996576, |
| "learning_rate": 5.990456158506879e-06, |
| "loss": 0.1905, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.4675776005054892, |
| "grad_norm": 0.206778112224436, |
| "learning_rate": 5.979642995197231e-06, |
| "loss": 0.1932, |
| "step": 7810 |
| }, |
| { |
| "epoch": 2.470736908617013, |
| "grad_norm": 0.21023136041598797, |
| "learning_rate": 5.968825065103904e-06, |
| "loss": 0.1914, |
| "step": 7820 |
| }, |
| { |
| "epoch": 2.4738962167285363, |
| "grad_norm": 0.20965600357426922, |
| "learning_rate": 5.958002420865184e-06, |
| "loss": 0.1908, |
| "step": 7830 |
| }, |
| { |
| "epoch": 2.47705552484006, |
| "grad_norm": 0.20939819430473985, |
| "learning_rate": 5.947175115142303e-06, |
| "loss": 0.1923, |
| "step": 7840 |
| }, |
| { |
| "epoch": 2.4802148329515834, |
| "grad_norm": 0.21000576536235138, |
| "learning_rate": 5.936343200619171e-06, |
| "loss": 0.1906, |
| "step": 7850 |
| }, |
| { |
| "epoch": 2.4833741410631074, |
| "grad_norm": 0.21638448903532395, |
| "learning_rate": 5.925506730002125e-06, |
| "loss": 0.1922, |
| "step": 7860 |
| }, |
| { |
| "epoch": 2.486533449174631, |
| "grad_norm": 0.21066924946782678, |
| "learning_rate": 5.914665756019672e-06, |
| "loss": 0.1926, |
| "step": 7870 |
| }, |
| { |
| "epoch": 2.4896927572861545, |
| "grad_norm": 0.21573344183841558, |
| "learning_rate": 5.903820331422228e-06, |
| "loss": 0.1929, |
| "step": 7880 |
| }, |
| { |
| "epoch": 2.492852065397678, |
| "grad_norm": 0.20934900894495664, |
| "learning_rate": 5.8929705089818665e-06, |
| "loss": 0.1915, |
| "step": 7890 |
| }, |
| { |
| "epoch": 2.4960113735092015, |
| "grad_norm": 0.19194977706940777, |
| "learning_rate": 5.882116341492063e-06, |
| "loss": 0.1918, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.499170681620725, |
| "grad_norm": 0.22582333577460095, |
| "learning_rate": 5.8712578817674356e-06, |
| "loss": 0.1909, |
| "step": 7910 |
| }, |
| { |
| "epoch": 2.5023299897322486, |
| "grad_norm": 0.2083520216392237, |
| "learning_rate": 5.860395182643481e-06, |
| "loss": 0.1915, |
| "step": 7920 |
| }, |
| { |
| "epoch": 2.505489297843772, |
| "grad_norm": 0.21101939233964928, |
| "learning_rate": 5.84952829697633e-06, |
| "loss": 0.1907, |
| "step": 7930 |
| }, |
| { |
| "epoch": 2.5086486059552957, |
| "grad_norm": 0.21143622985502697, |
| "learning_rate": 5.838657277642484e-06, |
| "loss": 0.1935, |
| "step": 7940 |
| }, |
| { |
| "epoch": 2.5118079140668192, |
| "grad_norm": 0.20175953224841772, |
| "learning_rate": 5.8277821775385575e-06, |
| "loss": 0.1924, |
| "step": 7950 |
| }, |
| { |
| "epoch": 2.514967222178343, |
| "grad_norm": 0.21068052700964154, |
| "learning_rate": 5.816903049581021e-06, |
| "loss": 0.1937, |
| "step": 7960 |
| }, |
| { |
| "epoch": 2.5181265302898668, |
| "grad_norm": 0.21882832311876035, |
| "learning_rate": 5.806019946705942e-06, |
| "loss": 0.191, |
| "step": 7970 |
| }, |
| { |
| "epoch": 2.52128583840139, |
| "grad_norm": 0.21291172763480593, |
| "learning_rate": 5.795132921868732e-06, |
| "loss": 0.1909, |
| "step": 7980 |
| }, |
| { |
| "epoch": 2.524445146512914, |
| "grad_norm": 0.2062457751313165, |
| "learning_rate": 5.784242028043886e-06, |
| "loss": 0.1913, |
| "step": 7990 |
| }, |
| { |
| "epoch": 2.5276044546244374, |
| "grad_norm": 0.19898272324165425, |
| "learning_rate": 5.773347318224726e-06, |
| "loss": 0.1926, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.530763762735961, |
| "grad_norm": 0.21382734065324233, |
| "learning_rate": 5.762448845423136e-06, |
| "loss": 0.1897, |
| "step": 8010 |
| }, |
| { |
| "epoch": 2.5339230708474845, |
| "grad_norm": 0.19860488219214512, |
| "learning_rate": 5.751546662669319e-06, |
| "loss": 0.1916, |
| "step": 8020 |
| }, |
| { |
| "epoch": 2.537082378959008, |
| "grad_norm": 0.20598643966772848, |
| "learning_rate": 5.74064082301152e-06, |
| "loss": 0.1889, |
| "step": 8030 |
| }, |
| { |
| "epoch": 2.5402416870705316, |
| "grad_norm": 0.20481756196092588, |
| "learning_rate": 5.729731379515787e-06, |
| "loss": 0.1916, |
| "step": 8040 |
| }, |
| { |
| "epoch": 2.543400995182055, |
| "grad_norm": 0.21953218134084496, |
| "learning_rate": 5.718818385265701e-06, |
| "loss": 0.1921, |
| "step": 8050 |
| }, |
| { |
| "epoch": 2.5465603032935786, |
| "grad_norm": 0.21169625628604868, |
| "learning_rate": 5.707901893362116e-06, |
| "loss": 0.1925, |
| "step": 8060 |
| }, |
| { |
| "epoch": 2.549719611405102, |
| "grad_norm": 0.20604257642179108, |
| "learning_rate": 5.696981956922909e-06, |
| "loss": 0.1906, |
| "step": 8070 |
| }, |
| { |
| "epoch": 2.5528789195166257, |
| "grad_norm": 0.2041617688415921, |
| "learning_rate": 5.686058629082718e-06, |
| "loss": 0.191, |
| "step": 8080 |
| }, |
| { |
| "epoch": 2.5560382276281493, |
| "grad_norm": 0.21493038957804925, |
| "learning_rate": 5.6751319629926834e-06, |
| "loss": 0.1903, |
| "step": 8090 |
| }, |
| { |
| "epoch": 2.5591975357396732, |
| "grad_norm": 0.22786525193461765, |
| "learning_rate": 5.664202011820183e-06, |
| "loss": 0.1907, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.5623568438511963, |
| "grad_norm": 0.20645959253391757, |
| "learning_rate": 5.653268828748588e-06, |
| "loss": 0.1901, |
| "step": 8110 |
| }, |
| { |
| "epoch": 2.5655161519627203, |
| "grad_norm": 0.19358622503604414, |
| "learning_rate": 5.642332466976989e-06, |
| "loss": 0.1878, |
| "step": 8120 |
| }, |
| { |
| "epoch": 2.568675460074244, |
| "grad_norm": 0.19969978459877627, |
| "learning_rate": 5.631392979719945e-06, |
| "loss": 0.1903, |
| "step": 8130 |
| }, |
| { |
| "epoch": 2.5718347681857674, |
| "grad_norm": 0.2143548911374617, |
| "learning_rate": 5.620450420207227e-06, |
| "loss": 0.1911, |
| "step": 8140 |
| }, |
| { |
| "epoch": 2.574994076297291, |
| "grad_norm": 0.21122139807224502, |
| "learning_rate": 5.609504841683551e-06, |
| "loss": 0.1904, |
| "step": 8150 |
| }, |
| { |
| "epoch": 2.5781533844088145, |
| "grad_norm": 0.21283424203204657, |
| "learning_rate": 5.598556297408322e-06, |
| "loss": 0.1929, |
| "step": 8160 |
| }, |
| { |
| "epoch": 2.581312692520338, |
| "grad_norm": 0.20416368302758592, |
| "learning_rate": 5.587604840655379e-06, |
| "loss": 0.1924, |
| "step": 8170 |
| }, |
| { |
| "epoch": 2.5844720006318616, |
| "grad_norm": 0.20184025843385978, |
| "learning_rate": 5.576650524712734e-06, |
| "loss": 0.1912, |
| "step": 8180 |
| }, |
| { |
| "epoch": 2.587631308743385, |
| "grad_norm": 0.20798749173073353, |
| "learning_rate": 5.565693402882306e-06, |
| "loss": 0.1923, |
| "step": 8190 |
| }, |
| { |
| "epoch": 2.5907906168549086, |
| "grad_norm": 0.2110396741272342, |
| "learning_rate": 5.554733528479672e-06, |
| "loss": 0.1901, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.5939499249664326, |
| "grad_norm": 0.2166668205307208, |
| "learning_rate": 5.5437709548337985e-06, |
| "loss": 0.191, |
| "step": 8210 |
| }, |
| { |
| "epoch": 2.5971092330779557, |
| "grad_norm": 0.22301181516302854, |
| "learning_rate": 5.53280573528679e-06, |
| "loss": 0.1911, |
| "step": 8220 |
| }, |
| { |
| "epoch": 2.6002685411894797, |
| "grad_norm": 0.20791087766916325, |
| "learning_rate": 5.521837923193621e-06, |
| "loss": 0.1889, |
| "step": 8230 |
| }, |
| { |
| "epoch": 2.6034278493010032, |
| "grad_norm": 0.21867859844216495, |
| "learning_rate": 5.510867571921887e-06, |
| "loss": 0.19, |
| "step": 8240 |
| }, |
| { |
| "epoch": 2.606587157412527, |
| "grad_norm": 0.20256198649149781, |
| "learning_rate": 5.499894734851533e-06, |
| "loss": 0.1908, |
| "step": 8250 |
| }, |
| { |
| "epoch": 2.6097464655240503, |
| "grad_norm": 0.2118493538585681, |
| "learning_rate": 5.488919465374601e-06, |
| "loss": 0.1898, |
| "step": 8260 |
| }, |
| { |
| "epoch": 2.612905773635574, |
| "grad_norm": 0.20908873607701223, |
| "learning_rate": 5.477941816894973e-06, |
| "loss": 0.1904, |
| "step": 8270 |
| }, |
| { |
| "epoch": 2.6160650817470974, |
| "grad_norm": 0.18994734431648216, |
| "learning_rate": 5.4669618428281e-06, |
| "loss": 0.1895, |
| "step": 8280 |
| }, |
| { |
| "epoch": 2.619224389858621, |
| "grad_norm": 0.21464906838442485, |
| "learning_rate": 5.455979596600752e-06, |
| "loss": 0.1906, |
| "step": 8290 |
| }, |
| { |
| "epoch": 2.6223836979701445, |
| "grad_norm": 0.20545088012285806, |
| "learning_rate": 5.444995131650757e-06, |
| "loss": 0.1916, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.625543006081668, |
| "grad_norm": 0.2308591130429511, |
| "learning_rate": 5.434008501426739e-06, |
| "loss": 0.1915, |
| "step": 8310 |
| }, |
| { |
| "epoch": 2.6287023141931916, |
| "grad_norm": 0.19533870441750978, |
| "learning_rate": 5.423019759387851e-06, |
| "loss": 0.1905, |
| "step": 8320 |
| }, |
| { |
| "epoch": 2.631861622304715, |
| "grad_norm": 0.21360189157598528, |
| "learning_rate": 5.41202895900353e-06, |
| "loss": 0.1891, |
| "step": 8330 |
| }, |
| { |
| "epoch": 2.635020930416239, |
| "grad_norm": 0.2086177679970007, |
| "learning_rate": 5.401036153753224e-06, |
| "loss": 0.1894, |
| "step": 8340 |
| }, |
| { |
| "epoch": 2.638180238527762, |
| "grad_norm": 0.20511520788720256, |
| "learning_rate": 5.390041397126139e-06, |
| "loss": 0.191, |
| "step": 8350 |
| }, |
| { |
| "epoch": 2.641339546639286, |
| "grad_norm": 0.20744339959234745, |
| "learning_rate": 5.379044742620975e-06, |
| "loss": 0.1906, |
| "step": 8360 |
| }, |
| { |
| "epoch": 2.6444988547508097, |
| "grad_norm": 0.21002258208476907, |
| "learning_rate": 5.368046243745664e-06, |
| "loss": 0.19, |
| "step": 8370 |
| }, |
| { |
| "epoch": 2.6476581628623332, |
| "grad_norm": 0.20045395931004378, |
| "learning_rate": 5.357045954017117e-06, |
| "loss": 0.1918, |
| "step": 8380 |
| }, |
| { |
| "epoch": 2.650817470973857, |
| "grad_norm": 0.20206701855858772, |
| "learning_rate": 5.346043926960955e-06, |
| "loss": 0.1914, |
| "step": 8390 |
| }, |
| { |
| "epoch": 2.6539767790853803, |
| "grad_norm": 0.21096628622208463, |
| "learning_rate": 5.335040216111259e-06, |
| "loss": 0.192, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.657136087196904, |
| "grad_norm": 0.20546853070131513, |
| "learning_rate": 5.324034875010293e-06, |
| "loss": 0.1913, |
| "step": 8410 |
| }, |
| { |
| "epoch": 2.6602953953084274, |
| "grad_norm": 0.21143968803860907, |
| "learning_rate": 5.313027957208262e-06, |
| "loss": 0.19, |
| "step": 8420 |
| }, |
| { |
| "epoch": 2.663454703419951, |
| "grad_norm": 0.20813089241185626, |
| "learning_rate": 5.30201951626304e-06, |
| "loss": 0.1908, |
| "step": 8430 |
| }, |
| { |
| "epoch": 2.6666140115314745, |
| "grad_norm": 0.2065605941239096, |
| "learning_rate": 5.291009605739912e-06, |
| "loss": 0.1914, |
| "step": 8440 |
| }, |
| { |
| "epoch": 2.669773319642998, |
| "grad_norm": 0.211794362752723, |
| "learning_rate": 5.279998279211315e-06, |
| "loss": 0.194, |
| "step": 8450 |
| }, |
| { |
| "epoch": 2.6729326277545216, |
| "grad_norm": 0.21353505062189707, |
| "learning_rate": 5.2689855902565725e-06, |
| "loss": 0.1907, |
| "step": 8460 |
| }, |
| { |
| "epoch": 2.6760919358660455, |
| "grad_norm": 0.20548536484778626, |
| "learning_rate": 5.257971592461643e-06, |
| "loss": 0.1889, |
| "step": 8470 |
| }, |
| { |
| "epoch": 2.6792512439775686, |
| "grad_norm": 0.20132489673742388, |
| "learning_rate": 5.2469563394188485e-06, |
| "loss": 0.1913, |
| "step": 8480 |
| }, |
| { |
| "epoch": 2.6824105520890926, |
| "grad_norm": 0.2183068296850621, |
| "learning_rate": 5.235939884726624e-06, |
| "loss": 0.1899, |
| "step": 8490 |
| }, |
| { |
| "epoch": 2.685569860200616, |
| "grad_norm": 0.21831688267562407, |
| "learning_rate": 5.224922281989245e-06, |
| "loss": 0.1885, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.6887291683121397, |
| "grad_norm": 0.20042363588520107, |
| "learning_rate": 5.213903584816578e-06, |
| "loss": 0.1919, |
| "step": 8510 |
| }, |
| { |
| "epoch": 2.6918884764236632, |
| "grad_norm": 0.19948817447604472, |
| "learning_rate": 5.202883846823816e-06, |
| "loss": 0.1869, |
| "step": 8520 |
| }, |
| { |
| "epoch": 2.695047784535187, |
| "grad_norm": 0.20763152988351818, |
| "learning_rate": 5.1918631216312095e-06, |
| "loss": 0.1892, |
| "step": 8530 |
| }, |
| { |
| "epoch": 2.6982070926467103, |
| "grad_norm": 0.21738471086652061, |
| "learning_rate": 5.1808414628638206e-06, |
| "loss": 0.1904, |
| "step": 8540 |
| }, |
| { |
| "epoch": 2.701366400758234, |
| "grad_norm": 0.21100460968688434, |
| "learning_rate": 5.16981892415125e-06, |
| "loss": 0.1899, |
| "step": 8550 |
| }, |
| { |
| "epoch": 2.7045257088697574, |
| "grad_norm": 0.21418226702194354, |
| "learning_rate": 5.158795559127379e-06, |
| "loss": 0.191, |
| "step": 8560 |
| }, |
| { |
| "epoch": 2.707685016981281, |
| "grad_norm": 0.20136963314709463, |
| "learning_rate": 5.147771421430112e-06, |
| "loss": 0.1903, |
| "step": 8570 |
| }, |
| { |
| "epoch": 2.710844325092805, |
| "grad_norm": 0.19436149226210073, |
| "learning_rate": 5.136746564701113e-06, |
| "loss": 0.1921, |
| "step": 8580 |
| }, |
| { |
| "epoch": 2.714003633204328, |
| "grad_norm": 0.2140972731026424, |
| "learning_rate": 5.125721042585542e-06, |
| "loss": 0.1896, |
| "step": 8590 |
| }, |
| { |
| "epoch": 2.717162941315852, |
| "grad_norm": 0.20787084627216532, |
| "learning_rate": 5.114694908731801e-06, |
| "loss": 0.1915, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.7203222494273755, |
| "grad_norm": 0.21044073918475403, |
| "learning_rate": 5.103668216791266e-06, |
| "loss": 0.1924, |
| "step": 8610 |
| }, |
| { |
| "epoch": 2.723481557538899, |
| "grad_norm": 0.20735197165221705, |
| "learning_rate": 5.092641020418026e-06, |
| "loss": 0.1886, |
| "step": 8620 |
| }, |
| { |
| "epoch": 2.7266408656504226, |
| "grad_norm": 0.206930807103157, |
| "learning_rate": 5.0816133732686305e-06, |
| "loss": 0.1898, |
| "step": 8630 |
| }, |
| { |
| "epoch": 2.729800173761946, |
| "grad_norm": 0.19205839670275318, |
| "learning_rate": 5.070585329001819e-06, |
| "loss": 0.1908, |
| "step": 8640 |
| }, |
| { |
| "epoch": 2.7329594818734697, |
| "grad_norm": 0.212328301487025, |
| "learning_rate": 5.059556941278261e-06, |
| "loss": 0.1903, |
| "step": 8650 |
| }, |
| { |
| "epoch": 2.7361187899849932, |
| "grad_norm": 0.2099153942052239, |
| "learning_rate": 5.048528263760301e-06, |
| "loss": 0.1909, |
| "step": 8660 |
| }, |
| { |
| "epoch": 2.739278098096517, |
| "grad_norm": 0.2137298602899187, |
| "learning_rate": 5.037499350111693e-06, |
| "loss": 0.1886, |
| "step": 8670 |
| }, |
| { |
| "epoch": 2.7424374062080403, |
| "grad_norm": 0.20013893402808897, |
| "learning_rate": 5.026470253997339e-06, |
| "loss": 0.1918, |
| "step": 8680 |
| }, |
| { |
| "epoch": 2.745596714319564, |
| "grad_norm": 0.2029041595541199, |
| "learning_rate": 5.0154410290830295e-06, |
| "loss": 0.1896, |
| "step": 8690 |
| }, |
| { |
| "epoch": 2.7487560224310874, |
| "grad_norm": 0.21844048806518204, |
| "learning_rate": 5.004411729035179e-06, |
| "loss": 0.1903, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.7519153305426114, |
| "grad_norm": 0.20229493690485986, |
| "learning_rate": 4.9933824075205735e-06, |
| "loss": 0.1889, |
| "step": 8710 |
| }, |
| { |
| "epoch": 2.7550746386541345, |
| "grad_norm": 0.20057196812482364, |
| "learning_rate": 4.982353118206095e-06, |
| "loss": 0.1905, |
| "step": 8720 |
| }, |
| { |
| "epoch": 2.7582339467656585, |
| "grad_norm": 0.20127831749959146, |
| "learning_rate": 4.971323914758479e-06, |
| "loss": 0.192, |
| "step": 8730 |
| }, |
| { |
| "epoch": 2.761393254877182, |
| "grad_norm": 0.19694835136970584, |
| "learning_rate": 4.9602948508440365e-06, |
| "loss": 0.1899, |
| "step": 8740 |
| }, |
| { |
| "epoch": 2.7645525629887056, |
| "grad_norm": 0.20744913562422332, |
| "learning_rate": 4.949265980128398e-06, |
| "loss": 0.1903, |
| "step": 8750 |
| }, |
| { |
| "epoch": 2.767711871100229, |
| "grad_norm": 0.1938079327085531, |
| "learning_rate": 4.938237356276261e-06, |
| "loss": 0.1909, |
| "step": 8760 |
| }, |
| { |
| "epoch": 2.7708711792117526, |
| "grad_norm": 0.2007179212194115, |
| "learning_rate": 4.9272090329511136e-06, |
| "loss": 0.1897, |
| "step": 8770 |
| }, |
| { |
| "epoch": 2.774030487323276, |
| "grad_norm": 0.21505457205199155, |
| "learning_rate": 4.916181063814989e-06, |
| "loss": 0.1925, |
| "step": 8780 |
| }, |
| { |
| "epoch": 2.7771897954347997, |
| "grad_norm": 0.21435123984044802, |
| "learning_rate": 4.905153502528193e-06, |
| "loss": 0.1902, |
| "step": 8790 |
| }, |
| { |
| "epoch": 2.7803491035463233, |
| "grad_norm": 0.2061154947415264, |
| "learning_rate": 4.894126402749044e-06, |
| "loss": 0.1898, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.783508411657847, |
| "grad_norm": 0.2131667185188113, |
| "learning_rate": 4.883099818133624e-06, |
| "loss": 0.193, |
| "step": 8810 |
| }, |
| { |
| "epoch": 2.7866677197693708, |
| "grad_norm": 0.21301174700619682, |
| "learning_rate": 4.872073802335499e-06, |
| "loss": 0.1906, |
| "step": 8820 |
| }, |
| { |
| "epoch": 2.789827027880894, |
| "grad_norm": 0.2108477817285028, |
| "learning_rate": 4.86104840900547e-06, |
| "loss": 0.19, |
| "step": 8830 |
| }, |
| { |
| "epoch": 2.792986335992418, |
| "grad_norm": 0.202466254802374, |
| "learning_rate": 4.850023691791313e-06, |
| "loss": 0.1913, |
| "step": 8840 |
| }, |
| { |
| "epoch": 2.7961456441039414, |
| "grad_norm": 0.21383889045040108, |
| "learning_rate": 4.838999704337507e-06, |
| "loss": 0.1905, |
| "step": 8850 |
| }, |
| { |
| "epoch": 2.799304952215465, |
| "grad_norm": 0.22342476096804859, |
| "learning_rate": 4.82797650028499e-06, |
| "loss": 0.1929, |
| "step": 8860 |
| }, |
| { |
| "epoch": 2.8024642603269885, |
| "grad_norm": 0.2124173070769737, |
| "learning_rate": 4.816954133270879e-06, |
| "loss": 0.1902, |
| "step": 8870 |
| }, |
| { |
| "epoch": 2.805623568438512, |
| "grad_norm": 0.20879593574150696, |
| "learning_rate": 4.805932656928218e-06, |
| "loss": 0.1907, |
| "step": 8880 |
| }, |
| { |
| "epoch": 2.8087828765500356, |
| "grad_norm": 0.2009544133407569, |
| "learning_rate": 4.794912124885728e-06, |
| "loss": 0.1924, |
| "step": 8890 |
| }, |
| { |
| "epoch": 2.811942184661559, |
| "grad_norm": 0.2176494138664626, |
| "learning_rate": 4.78389259076752e-06, |
| "loss": 0.19, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.8151014927730826, |
| "grad_norm": 0.20657867775507024, |
| "learning_rate": 4.772874108192864e-06, |
| "loss": 0.1886, |
| "step": 8910 |
| }, |
| { |
| "epoch": 2.818260800884606, |
| "grad_norm": 0.19602872268511337, |
| "learning_rate": 4.761856730775902e-06, |
| "loss": 0.1901, |
| "step": 8920 |
| }, |
| { |
| "epoch": 2.8214201089961297, |
| "grad_norm": 0.21196238191751543, |
| "learning_rate": 4.750840512125403e-06, |
| "loss": 0.1883, |
| "step": 8930 |
| }, |
| { |
| "epoch": 2.8245794171076533, |
| "grad_norm": 0.22132755218766037, |
| "learning_rate": 4.7398255058445e-06, |
| "loss": 0.1884, |
| "step": 8940 |
| }, |
| { |
| "epoch": 2.8277387252191772, |
| "grad_norm": 0.21199745533152217, |
| "learning_rate": 4.72881176553042e-06, |
| "loss": 0.1893, |
| "step": 8950 |
| }, |
| { |
| "epoch": 2.8308980333307003, |
| "grad_norm": 0.20115752802009068, |
| "learning_rate": 4.717799344774241e-06, |
| "loss": 0.19, |
| "step": 8960 |
| }, |
| { |
| "epoch": 2.8340573414422243, |
| "grad_norm": 0.1971895330735378, |
| "learning_rate": 4.706788297160608e-06, |
| "loss": 0.1914, |
| "step": 8970 |
| }, |
| { |
| "epoch": 2.837216649553748, |
| "grad_norm": 0.22093757797756977, |
| "learning_rate": 4.69577867626749e-06, |
| "loss": 0.1911, |
| "step": 8980 |
| }, |
| { |
| "epoch": 2.8403759576652714, |
| "grad_norm": 0.20276757417965852, |
| "learning_rate": 4.684770535665917e-06, |
| "loss": 0.1894, |
| "step": 8990 |
| }, |
| { |
| "epoch": 2.843535265776795, |
| "grad_norm": 0.2052738035290844, |
| "learning_rate": 4.673763928919712e-06, |
| "loss": 0.1904, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.8466945738883185, |
| "grad_norm": 0.1997948903035985, |
| "learning_rate": 4.662758909585233e-06, |
| "loss": 0.1902, |
| "step": 9010 |
| }, |
| { |
| "epoch": 2.849853881999842, |
| "grad_norm": 0.21607370638898796, |
| "learning_rate": 4.651755531211121e-06, |
| "loss": 0.1885, |
| "step": 9020 |
| }, |
| { |
| "epoch": 2.8530131901113656, |
| "grad_norm": 0.2072331158936079, |
| "learning_rate": 4.640753847338022e-06, |
| "loss": 0.1903, |
| "step": 9030 |
| }, |
| { |
| "epoch": 2.856172498222889, |
| "grad_norm": 0.2095788996901622, |
| "learning_rate": 4.629753911498348e-06, |
| "loss": 0.1906, |
| "step": 9040 |
| }, |
| { |
| "epoch": 2.8593318063344126, |
| "grad_norm": 0.21107554604389273, |
| "learning_rate": 4.618755777215998e-06, |
| "loss": 0.1875, |
| "step": 9050 |
| }, |
| { |
| "epoch": 2.862491114445936, |
| "grad_norm": 0.20742194888322327, |
| "learning_rate": 4.607759498006105e-06, |
| "loss": 0.1899, |
| "step": 9060 |
| }, |
| { |
| "epoch": 2.8656504225574597, |
| "grad_norm": 0.1998899750485672, |
| "learning_rate": 4.596765127374781e-06, |
| "loss": 0.1887, |
| "step": 9070 |
| }, |
| { |
| "epoch": 2.8688097306689837, |
| "grad_norm": 0.21434577458006387, |
| "learning_rate": 4.5857727188188426e-06, |
| "loss": 0.19, |
| "step": 9080 |
| }, |
| { |
| "epoch": 2.871969038780507, |
| "grad_norm": 0.20486890206169453, |
| "learning_rate": 4.57478232582557e-06, |
| "loss": 0.1888, |
| "step": 9090 |
| }, |
| { |
| "epoch": 2.875128346892031, |
| "grad_norm": 0.19626003974934564, |
| "learning_rate": 4.563794001872428e-06, |
| "loss": 0.189, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.8782876550035543, |
| "grad_norm": 0.19598418482256752, |
| "learning_rate": 4.5528078004268125e-06, |
| "loss": 0.1908, |
| "step": 9110 |
| }, |
| { |
| "epoch": 2.881446963115078, |
| "grad_norm": 0.20771737890224903, |
| "learning_rate": 4.5418237749458e-06, |
| "loss": 0.1893, |
| "step": 9120 |
| }, |
| { |
| "epoch": 2.8846062712266014, |
| "grad_norm": 0.21369972050863945, |
| "learning_rate": 4.5308419788758705e-06, |
| "loss": 0.1914, |
| "step": 9130 |
| }, |
| { |
| "epoch": 2.887765579338125, |
| "grad_norm": 0.19830047599777365, |
| "learning_rate": 4.519862465652664e-06, |
| "loss": 0.1891, |
| "step": 9140 |
| }, |
| { |
| "epoch": 2.8909248874496485, |
| "grad_norm": 0.21728853363365294, |
| "learning_rate": 4.508885288700706e-06, |
| "loss": 0.1878, |
| "step": 9150 |
| }, |
| { |
| "epoch": 2.894084195561172, |
| "grad_norm": 0.21489567981218682, |
| "learning_rate": 4.497910501433153e-06, |
| "loss": 0.1892, |
| "step": 9160 |
| }, |
| { |
| "epoch": 2.8972435036726956, |
| "grad_norm": 0.1999440134687983, |
| "learning_rate": 4.486938157251544e-06, |
| "loss": 0.1913, |
| "step": 9170 |
| }, |
| { |
| "epoch": 2.900402811784219, |
| "grad_norm": 0.21121245524106846, |
| "learning_rate": 4.475968309545519e-06, |
| "loss": 0.192, |
| "step": 9180 |
| }, |
| { |
| "epoch": 2.903562119895743, |
| "grad_norm": 0.19232313304792983, |
| "learning_rate": 4.465001011692575e-06, |
| "loss": 0.1884, |
| "step": 9190 |
| }, |
| { |
| "epoch": 2.906721428007266, |
| "grad_norm": 0.2047713363163475, |
| "learning_rate": 4.454036317057804e-06, |
| "loss": 0.1897, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.90988073611879, |
| "grad_norm": 0.20838735166818767, |
| "learning_rate": 4.443074278993625e-06, |
| "loss": 0.1868, |
| "step": 9210 |
| }, |
| { |
| "epoch": 2.9130400442303137, |
| "grad_norm": 0.19377646969116016, |
| "learning_rate": 4.43211495083954e-06, |
| "loss": 0.1899, |
| "step": 9220 |
| }, |
| { |
| "epoch": 2.9161993523418372, |
| "grad_norm": 0.2081826810286359, |
| "learning_rate": 4.421158385921856e-06, |
| "loss": 0.1901, |
| "step": 9230 |
| }, |
| { |
| "epoch": 2.919358660453361, |
| "grad_norm": 0.19836105918450578, |
| "learning_rate": 4.410204637553437e-06, |
| "loss": 0.1897, |
| "step": 9240 |
| }, |
| { |
| "epoch": 2.9225179685648843, |
| "grad_norm": 0.20794480096225132, |
| "learning_rate": 4.3992537590334485e-06, |
| "loss": 0.1904, |
| "step": 9250 |
| }, |
| { |
| "epoch": 2.925677276676408, |
| "grad_norm": 0.21149092375760936, |
| "learning_rate": 4.38830580364708e-06, |
| "loss": 0.1897, |
| "step": 9260 |
| }, |
| { |
| "epoch": 2.9288365847879314, |
| "grad_norm": 0.20074752053463094, |
| "learning_rate": 4.377360824665309e-06, |
| "loss": 0.1876, |
| "step": 9270 |
| }, |
| { |
| "epoch": 2.931995892899455, |
| "grad_norm": 0.20279658516073718, |
| "learning_rate": 4.366418875344624e-06, |
| "loss": 0.1888, |
| "step": 9280 |
| }, |
| { |
| "epoch": 2.9351552010109785, |
| "grad_norm": 0.202921993136915, |
| "learning_rate": 4.3554800089267705e-06, |
| "loss": 0.192, |
| "step": 9290 |
| }, |
| { |
| "epoch": 2.938314509122502, |
| "grad_norm": 0.2112414397403086, |
| "learning_rate": 4.344544278638499e-06, |
| "loss": 0.1883, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.9414738172340256, |
| "grad_norm": 0.1979648780466572, |
| "learning_rate": 4.333611737691296e-06, |
| "loss": 0.188, |
| "step": 9310 |
| }, |
| { |
| "epoch": 2.9446331253455496, |
| "grad_norm": 0.19936914995735966, |
| "learning_rate": 4.322682439281126e-06, |
| "loss": 0.1876, |
| "step": 9320 |
| }, |
| { |
| "epoch": 2.9477924334570726, |
| "grad_norm": 0.2045530468116589, |
| "learning_rate": 4.311756436588185e-06, |
| "loss": 0.1861, |
| "step": 9330 |
| }, |
| { |
| "epoch": 2.9509517415685966, |
| "grad_norm": 0.2207404912444104, |
| "learning_rate": 4.300833782776624e-06, |
| "loss": 0.1894, |
| "step": 9340 |
| }, |
| { |
| "epoch": 2.95411104968012, |
| "grad_norm": 0.19298829375532547, |
| "learning_rate": 4.289914530994303e-06, |
| "loss": 0.1885, |
| "step": 9350 |
| }, |
| { |
| "epoch": 2.9572703577916437, |
| "grad_norm": 0.20201449973484348, |
| "learning_rate": 4.27899873437253e-06, |
| "loss": 0.1892, |
| "step": 9360 |
| }, |
| { |
| "epoch": 2.9604296659031673, |
| "grad_norm": 0.2038957223369961, |
| "learning_rate": 4.268086446025793e-06, |
| "loss": 0.1884, |
| "step": 9370 |
| }, |
| { |
| "epoch": 2.963588974014691, |
| "grad_norm": 0.21578914203411362, |
| "learning_rate": 4.25717771905152e-06, |
| "loss": 0.1892, |
| "step": 9380 |
| }, |
| { |
| "epoch": 2.9667482821262143, |
| "grad_norm": 0.21096600784690153, |
| "learning_rate": 4.2462726065298e-06, |
| "loss": 0.1902, |
| "step": 9390 |
| }, |
| { |
| "epoch": 2.969907590237738, |
| "grad_norm": 0.2033196759446758, |
| "learning_rate": 4.235371161523141e-06, |
| "loss": 0.1892, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.9730668983492614, |
| "grad_norm": 0.20356432378382075, |
| "learning_rate": 4.224473437076204e-06, |
| "loss": 0.1905, |
| "step": 9410 |
| }, |
| { |
| "epoch": 2.976226206460785, |
| "grad_norm": 0.21038412746729923, |
| "learning_rate": 4.2135794862155454e-06, |
| "loss": 0.1912, |
| "step": 9420 |
| }, |
| { |
| "epoch": 2.979385514572309, |
| "grad_norm": 0.2033988073630781, |
| "learning_rate": 4.20268936194936e-06, |
| "loss": 0.1897, |
| "step": 9430 |
| }, |
| { |
| "epoch": 2.982544822683832, |
| "grad_norm": 0.20236599162357977, |
| "learning_rate": 4.191803117267223e-06, |
| "loss": 0.1893, |
| "step": 9440 |
| }, |
| { |
| "epoch": 2.985704130795356, |
| "grad_norm": 0.20641674023136433, |
| "learning_rate": 4.180920805139835e-06, |
| "loss": 0.1888, |
| "step": 9450 |
| }, |
| { |
| "epoch": 2.9888634389068796, |
| "grad_norm": 0.22914246659818197, |
| "learning_rate": 4.170042478518759e-06, |
| "loss": 0.1875, |
| "step": 9460 |
| }, |
| { |
| "epoch": 2.992022747018403, |
| "grad_norm": 0.20738233476923548, |
| "learning_rate": 4.159168190336162e-06, |
| "loss": 0.187, |
| "step": 9470 |
| }, |
| { |
| "epoch": 2.9951820551299266, |
| "grad_norm": 0.20804494541317625, |
| "learning_rate": 4.148297993504566e-06, |
| "loss": 0.1902, |
| "step": 9480 |
| }, |
| { |
| "epoch": 2.99834136324145, |
| "grad_norm": 0.19974693835491714, |
| "learning_rate": 4.137431940916584e-06, |
| "loss": 0.1866, |
| "step": 9490 |
| }, |
| { |
| "epoch": 3.001579654055762, |
| "grad_norm": 0.19231070887741758, |
| "learning_rate": 4.12657008544466e-06, |
| "loss": 0.1846, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.0047389621672855, |
| "grad_norm": 0.20508127207154292, |
| "learning_rate": 4.115712479940821e-06, |
| "loss": 0.1717, |
| "step": 9510 |
| }, |
| { |
| "epoch": 3.007898270278809, |
| "grad_norm": 0.19947206755336946, |
| "learning_rate": 4.10485917723641e-06, |
| "loss": 0.1711, |
| "step": 9520 |
| }, |
| { |
| "epoch": 3.0110575783903326, |
| "grad_norm": 0.19753840539867382, |
| "learning_rate": 4.0940102301418375e-06, |
| "loss": 0.1721, |
| "step": 9530 |
| }, |
| { |
| "epoch": 3.014216886501856, |
| "grad_norm": 0.1992468045730409, |
| "learning_rate": 4.083165691446314e-06, |
| "loss": 0.1719, |
| "step": 9540 |
| }, |
| { |
| "epoch": 3.0173761946133797, |
| "grad_norm": 0.20255881958221392, |
| "learning_rate": 4.072325613917605e-06, |
| "loss": 0.1719, |
| "step": 9550 |
| }, |
| { |
| "epoch": 3.0205355027249032, |
| "grad_norm": 0.1899039859129075, |
| "learning_rate": 4.061490050301767e-06, |
| "loss": 0.1699, |
| "step": 9560 |
| }, |
| { |
| "epoch": 3.0236948108364268, |
| "grad_norm": 0.20612020049628732, |
| "learning_rate": 4.050659053322892e-06, |
| "loss": 0.1714, |
| "step": 9570 |
| }, |
| { |
| "epoch": 3.0268541189479503, |
| "grad_norm": 0.21789263057401254, |
| "learning_rate": 4.039832675682854e-06, |
| "loss": 0.1723, |
| "step": 9580 |
| }, |
| { |
| "epoch": 3.030013427059474, |
| "grad_norm": 0.2026996020270893, |
| "learning_rate": 4.0290109700610445e-06, |
| "loss": 0.17, |
| "step": 9590 |
| }, |
| { |
| "epoch": 3.0331727351709974, |
| "grad_norm": 0.19422527184239666, |
| "learning_rate": 4.0181939891141276e-06, |
| "loss": 0.1715, |
| "step": 9600 |
| }, |
| { |
| "epoch": 3.036332043282521, |
| "grad_norm": 0.19811136542886754, |
| "learning_rate": 4.007381785475776e-06, |
| "loss": 0.1707, |
| "step": 9610 |
| }, |
| { |
| "epoch": 3.039491351394045, |
| "grad_norm": 0.21473702063694924, |
| "learning_rate": 3.996574411756412e-06, |
| "loss": 0.1717, |
| "step": 9620 |
| }, |
| { |
| "epoch": 3.0426506595055685, |
| "grad_norm": 0.20093271189509854, |
| "learning_rate": 3.9857719205429666e-06, |
| "loss": 0.1698, |
| "step": 9630 |
| }, |
| { |
| "epoch": 3.045809967617092, |
| "grad_norm": 0.2054432390601117, |
| "learning_rate": 3.974974364398604e-06, |
| "loss": 0.1722, |
| "step": 9640 |
| }, |
| { |
| "epoch": 3.0489692757286155, |
| "grad_norm": 0.21106353915988532, |
| "learning_rate": 3.964181795862476e-06, |
| "loss": 0.1702, |
| "step": 9650 |
| }, |
| { |
| "epoch": 3.052128583840139, |
| "grad_norm": 0.20956851733321105, |
| "learning_rate": 3.9533942674494736e-06, |
| "loss": 0.1712, |
| "step": 9660 |
| }, |
| { |
| "epoch": 3.0552878919516626, |
| "grad_norm": 0.2151757551569205, |
| "learning_rate": 3.942611831649953e-06, |
| "loss": 0.1723, |
| "step": 9670 |
| }, |
| { |
| "epoch": 3.058447200063186, |
| "grad_norm": 0.20087929271216232, |
| "learning_rate": 3.931834540929498e-06, |
| "loss": 0.1729, |
| "step": 9680 |
| }, |
| { |
| "epoch": 3.0616065081747097, |
| "grad_norm": 0.19812955505838692, |
| "learning_rate": 3.9210624477286545e-06, |
| "loss": 0.1702, |
| "step": 9690 |
| }, |
| { |
| "epoch": 3.0647658162862332, |
| "grad_norm": 0.19934250074039184, |
| "learning_rate": 3.910295604462675e-06, |
| "loss": 0.1718, |
| "step": 9700 |
| }, |
| { |
| "epoch": 3.0679251243977568, |
| "grad_norm": 0.22292842551894693, |
| "learning_rate": 3.899534063521274e-06, |
| "loss": 0.1703, |
| "step": 9710 |
| }, |
| { |
| "epoch": 3.0710844325092803, |
| "grad_norm": 0.20621823300766834, |
| "learning_rate": 3.888777877268361e-06, |
| "loss": 0.1718, |
| "step": 9720 |
| }, |
| { |
| "epoch": 3.074243740620804, |
| "grad_norm": 0.20627308571255917, |
| "learning_rate": 3.8780270980417865e-06, |
| "loss": 0.1715, |
| "step": 9730 |
| }, |
| { |
| "epoch": 3.077403048732328, |
| "grad_norm": 0.20092589221833093, |
| "learning_rate": 3.867281778153103e-06, |
| "loss": 0.1708, |
| "step": 9740 |
| }, |
| { |
| "epoch": 3.0805623568438514, |
| "grad_norm": 0.20178715984754905, |
| "learning_rate": 3.856541969887284e-06, |
| "loss": 0.1713, |
| "step": 9750 |
| }, |
| { |
| "epoch": 3.083721664955375, |
| "grad_norm": 0.20333774590909526, |
| "learning_rate": 3.8458077255024985e-06, |
| "loss": 0.1711, |
| "step": 9760 |
| }, |
| { |
| "epoch": 3.0868809730668985, |
| "grad_norm": 0.19812370974809254, |
| "learning_rate": 3.835079097229834e-06, |
| "loss": 0.1716, |
| "step": 9770 |
| }, |
| { |
| "epoch": 3.090040281178422, |
| "grad_norm": 0.19168564591899634, |
| "learning_rate": 3.82435613727305e-06, |
| "loss": 0.1712, |
| "step": 9780 |
| }, |
| { |
| "epoch": 3.0931995892899455, |
| "grad_norm": 0.20301128877899655, |
| "learning_rate": 3.8136388978083318e-06, |
| "loss": 0.1717, |
| "step": 9790 |
| }, |
| { |
| "epoch": 3.096358897401469, |
| "grad_norm": 0.21119806157032167, |
| "learning_rate": 3.802927430984024e-06, |
| "loss": 0.1713, |
| "step": 9800 |
| }, |
| { |
| "epoch": 3.0995182055129926, |
| "grad_norm": 0.19958619654256107, |
| "learning_rate": 3.7922217889203815e-06, |
| "loss": 0.1729, |
| "step": 9810 |
| }, |
| { |
| "epoch": 3.102677513624516, |
| "grad_norm": 0.1968913471359473, |
| "learning_rate": 3.781522023709325e-06, |
| "loss": 0.172, |
| "step": 9820 |
| }, |
| { |
| "epoch": 3.1058368217360397, |
| "grad_norm": 0.20650603439149065, |
| "learning_rate": 3.770828187414169e-06, |
| "loss": 0.1714, |
| "step": 9830 |
| }, |
| { |
| "epoch": 3.1089961298475632, |
| "grad_norm": 0.20480879307691913, |
| "learning_rate": 3.7601403320693877e-06, |
| "loss": 0.1731, |
| "step": 9840 |
| }, |
| { |
| "epoch": 3.112155437959087, |
| "grad_norm": 0.19841361175349975, |
| "learning_rate": 3.7494585096803475e-06, |
| "loss": 0.17, |
| "step": 9850 |
| }, |
| { |
| "epoch": 3.1153147460706103, |
| "grad_norm": 0.20023910284222765, |
| "learning_rate": 3.7387827722230592e-06, |
| "loss": 0.1719, |
| "step": 9860 |
| }, |
| { |
| "epoch": 3.1184740541821343, |
| "grad_norm": 0.20538502307132153, |
| "learning_rate": 3.72811317164393e-06, |
| "loss": 0.1714, |
| "step": 9870 |
| }, |
| { |
| "epoch": 3.121633362293658, |
| "grad_norm": 0.2025961874013164, |
| "learning_rate": 3.7174497598595004e-06, |
| "loss": 0.1731, |
| "step": 9880 |
| }, |
| { |
| "epoch": 3.1247926704051814, |
| "grad_norm": 0.19897689525350173, |
| "learning_rate": 3.7067925887562035e-06, |
| "loss": 0.1709, |
| "step": 9890 |
| }, |
| { |
| "epoch": 3.127951978516705, |
| "grad_norm": 0.19674997421298326, |
| "learning_rate": 3.6961417101901004e-06, |
| "loss": 0.1709, |
| "step": 9900 |
| }, |
| { |
| "epoch": 3.1311112866282285, |
| "grad_norm": 0.20048513567462214, |
| "learning_rate": 3.6854971759866343e-06, |
| "loss": 0.168, |
| "step": 9910 |
| }, |
| { |
| "epoch": 3.134270594739752, |
| "grad_norm": 0.20877865427323217, |
| "learning_rate": 3.6748590379403837e-06, |
| "loss": 0.1699, |
| "step": 9920 |
| }, |
| { |
| "epoch": 3.1374299028512755, |
| "grad_norm": 0.20947180408342017, |
| "learning_rate": 3.664227347814796e-06, |
| "loss": 0.1718, |
| "step": 9930 |
| }, |
| { |
| "epoch": 3.140589210962799, |
| "grad_norm": 0.19984590881674016, |
| "learning_rate": 3.653602157341953e-06, |
| "loss": 0.1744, |
| "step": 9940 |
| }, |
| { |
| "epoch": 3.1437485190743226, |
| "grad_norm": 0.20244914750147294, |
| "learning_rate": 3.6429835182223028e-06, |
| "loss": 0.1701, |
| "step": 9950 |
| }, |
| { |
| "epoch": 3.146907827185846, |
| "grad_norm": 0.20651719839215915, |
| "learning_rate": 3.632371482124416e-06, |
| "loss": 0.1722, |
| "step": 9960 |
| }, |
| { |
| "epoch": 3.1500671352973697, |
| "grad_norm": 0.21320130534174414, |
| "learning_rate": 3.621766100684742e-06, |
| "loss": 0.1719, |
| "step": 9970 |
| }, |
| { |
| "epoch": 3.1532264434088937, |
| "grad_norm": 0.1982018761614747, |
| "learning_rate": 3.6111674255073415e-06, |
| "loss": 0.1697, |
| "step": 9980 |
| }, |
| { |
| "epoch": 3.1563857515204172, |
| "grad_norm": 0.19540192155635913, |
| "learning_rate": 3.600575508163643e-06, |
| "loss": 0.1716, |
| "step": 9990 |
| }, |
| { |
| "epoch": 3.1595450596319408, |
| "grad_norm": 0.19265346677541417, |
| "learning_rate": 3.5899904001922014e-06, |
| "loss": 0.1723, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.1627043677434643, |
| "grad_norm": 0.2602993365623357, |
| "learning_rate": 3.579412153098428e-06, |
| "loss": 0.1717, |
| "step": 10010 |
| }, |
| { |
| "epoch": 3.165863675854988, |
| "grad_norm": 0.2072576388794693, |
| "learning_rate": 3.568840818354359e-06, |
| "loss": 0.1705, |
| "step": 10020 |
| }, |
| { |
| "epoch": 3.1690229839665114, |
| "grad_norm": 0.20292312081490704, |
| "learning_rate": 3.5582764473983898e-06, |
| "loss": 0.1708, |
| "step": 10030 |
| }, |
| { |
| "epoch": 3.172182292078035, |
| "grad_norm": 0.19839564056735176, |
| "learning_rate": 3.5477190916350314e-06, |
| "loss": 0.173, |
| "step": 10040 |
| }, |
| { |
| "epoch": 3.1753416001895585, |
| "grad_norm": 0.21059840553848913, |
| "learning_rate": 3.5371688024346663e-06, |
| "loss": 0.1728, |
| "step": 10050 |
| }, |
| { |
| "epoch": 3.178500908301082, |
| "grad_norm": 0.19127556281230237, |
| "learning_rate": 3.5266256311332838e-06, |
| "loss": 0.1717, |
| "step": 10060 |
| }, |
| { |
| "epoch": 3.1816602164126055, |
| "grad_norm": 0.20280055781097764, |
| "learning_rate": 3.5160896290322466e-06, |
| "loss": 0.1718, |
| "step": 10070 |
| }, |
| { |
| "epoch": 3.184819524524129, |
| "grad_norm": 0.19870253928511597, |
| "learning_rate": 3.5055608473980275e-06, |
| "loss": 0.173, |
| "step": 10080 |
| }, |
| { |
| "epoch": 3.1879788326356526, |
| "grad_norm": 0.20124551003316218, |
| "learning_rate": 3.495039337461966e-06, |
| "loss": 0.1714, |
| "step": 10090 |
| }, |
| { |
| "epoch": 3.191138140747176, |
| "grad_norm": 0.1974863499022992, |
| "learning_rate": 3.484525150420024e-06, |
| "loss": 0.1727, |
| "step": 10100 |
| }, |
| { |
| "epoch": 3.1942974488587, |
| "grad_norm": 0.2056406098760328, |
| "learning_rate": 3.474018337432526e-06, |
| "loss": 0.1711, |
| "step": 10110 |
| }, |
| { |
| "epoch": 3.1974567569702237, |
| "grad_norm": 0.1993829016013529, |
| "learning_rate": 3.4635189496239147e-06, |
| "loss": 0.1723, |
| "step": 10120 |
| }, |
| { |
| "epoch": 3.2006160650817472, |
| "grad_norm": 0.20235980644501536, |
| "learning_rate": 3.4530270380825106e-06, |
| "loss": 0.1719, |
| "step": 10130 |
| }, |
| { |
| "epoch": 3.2037753731932708, |
| "grad_norm": 0.21045502485536316, |
| "learning_rate": 3.442542653860246e-06, |
| "loss": 0.1728, |
| "step": 10140 |
| }, |
| { |
| "epoch": 3.2069346813047943, |
| "grad_norm": 0.1983591302141842, |
| "learning_rate": 3.4320658479724358e-06, |
| "loss": 0.1714, |
| "step": 10150 |
| }, |
| { |
| "epoch": 3.210093989416318, |
| "grad_norm": 0.20220358320589318, |
| "learning_rate": 3.4215966713975137e-06, |
| "loss": 0.1721, |
| "step": 10160 |
| }, |
| { |
| "epoch": 3.2132532975278414, |
| "grad_norm": 0.20148720905716092, |
| "learning_rate": 3.41113517507679e-06, |
| "loss": 0.1722, |
| "step": 10170 |
| }, |
| { |
| "epoch": 3.216412605639365, |
| "grad_norm": 0.21958364967195199, |
| "learning_rate": 3.400681409914211e-06, |
| "loss": 0.1717, |
| "step": 10180 |
| }, |
| { |
| "epoch": 3.2195719137508885, |
| "grad_norm": 0.20443876928391433, |
| "learning_rate": 3.390235426776095e-06, |
| "loss": 0.1723, |
| "step": 10190 |
| }, |
| { |
| "epoch": 3.222731221862412, |
| "grad_norm": 0.20468900776748283, |
| "learning_rate": 3.3797972764909044e-06, |
| "loss": 0.1728, |
| "step": 10200 |
| }, |
| { |
| "epoch": 3.2258905299739355, |
| "grad_norm": 0.20712330871995108, |
| "learning_rate": 3.3693670098489794e-06, |
| "loss": 0.1717, |
| "step": 10210 |
| }, |
| { |
| "epoch": 3.229049838085459, |
| "grad_norm": 0.20539340759709276, |
| "learning_rate": 3.3589446776023026e-06, |
| "loss": 0.1735, |
| "step": 10220 |
| }, |
| { |
| "epoch": 3.2322091461969826, |
| "grad_norm": 0.20853940556733697, |
| "learning_rate": 3.3485303304642523e-06, |
| "loss": 0.1734, |
| "step": 10230 |
| }, |
| { |
| "epoch": 3.2353684543085066, |
| "grad_norm": 0.20622108613855025, |
| "learning_rate": 3.338124019109348e-06, |
| "loss": 0.1731, |
| "step": 10240 |
| }, |
| { |
| "epoch": 3.23852776242003, |
| "grad_norm": 0.20725725534935868, |
| "learning_rate": 3.3277257941730112e-06, |
| "loss": 0.1701, |
| "step": 10250 |
| }, |
| { |
| "epoch": 3.2416870705315537, |
| "grad_norm": 0.20483333136083265, |
| "learning_rate": 3.3173357062513156e-06, |
| "loss": 0.1726, |
| "step": 10260 |
| }, |
| { |
| "epoch": 3.2448463786430772, |
| "grad_norm": 0.19730208138726862, |
| "learning_rate": 3.30695380590074e-06, |
| "loss": 0.1719, |
| "step": 10270 |
| }, |
| { |
| "epoch": 3.2480056867546008, |
| "grad_norm": 0.19161529058623247, |
| "learning_rate": 3.2965801436379268e-06, |
| "loss": 0.1703, |
| "step": 10280 |
| }, |
| { |
| "epoch": 3.2511649948661243, |
| "grad_norm": 0.20183756272218625, |
| "learning_rate": 3.2862147699394308e-06, |
| "loss": 0.1707, |
| "step": 10290 |
| }, |
| { |
| "epoch": 3.254324302977648, |
| "grad_norm": 0.2077259975279863, |
| "learning_rate": 3.2758577352414746e-06, |
| "loss": 0.1724, |
| "step": 10300 |
| }, |
| { |
| "epoch": 3.2574836110891714, |
| "grad_norm": 0.19909554676713112, |
| "learning_rate": 3.2655090899397104e-06, |
| "loss": 0.1727, |
| "step": 10310 |
| }, |
| { |
| "epoch": 3.260642919200695, |
| "grad_norm": 0.19458384557088879, |
| "learning_rate": 3.255168884388962e-06, |
| "loss": 0.1706, |
| "step": 10320 |
| }, |
| { |
| "epoch": 3.2638022273122185, |
| "grad_norm": 0.20300444839784934, |
| "learning_rate": 3.2448371689029917e-06, |
| "loss": 0.17, |
| "step": 10330 |
| }, |
| { |
| "epoch": 3.266961535423742, |
| "grad_norm": 0.2037381038102816, |
| "learning_rate": 3.2345139937542493e-06, |
| "loss": 0.1707, |
| "step": 10340 |
| }, |
| { |
| "epoch": 3.270120843535266, |
| "grad_norm": 0.20261293016006007, |
| "learning_rate": 3.2241994091736264e-06, |
| "loss": 0.1716, |
| "step": 10350 |
| }, |
| { |
| "epoch": 3.2732801516467895, |
| "grad_norm": 0.20445117127878168, |
| "learning_rate": 3.2138934653502157e-06, |
| "loss": 0.1715, |
| "step": 10360 |
| }, |
| { |
| "epoch": 3.276439459758313, |
| "grad_norm": 0.20006168658634946, |
| "learning_rate": 3.2035962124310677e-06, |
| "loss": 0.1699, |
| "step": 10370 |
| }, |
| { |
| "epoch": 3.2795987678698366, |
| "grad_norm": 0.2048357399994623, |
| "learning_rate": 3.1933077005209413e-06, |
| "loss": 0.1714, |
| "step": 10380 |
| }, |
| { |
| "epoch": 3.28275807598136, |
| "grad_norm": 0.20764582211703872, |
| "learning_rate": 3.1830279796820655e-06, |
| "loss": 0.1726, |
| "step": 10390 |
| }, |
| { |
| "epoch": 3.2859173840928837, |
| "grad_norm": 0.19507013108646712, |
| "learning_rate": 3.17275709993389e-06, |
| "loss": 0.1686, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.2890766922044072, |
| "grad_norm": 0.20376140005700832, |
| "learning_rate": 3.1624951112528486e-06, |
| "loss": 0.1727, |
| "step": 10410 |
| }, |
| { |
| "epoch": 3.2922360003159308, |
| "grad_norm": 0.19814895837042856, |
| "learning_rate": 3.152242063572111e-06, |
| "loss": 0.172, |
| "step": 10420 |
| }, |
| { |
| "epoch": 3.2953953084274543, |
| "grad_norm": 0.20809655011071984, |
| "learning_rate": 3.1419980067813416e-06, |
| "loss": 0.1723, |
| "step": 10430 |
| }, |
| { |
| "epoch": 3.298554616538978, |
| "grad_norm": 0.2130288394869752, |
| "learning_rate": 3.131762990726457e-06, |
| "loss": 0.1693, |
| "step": 10440 |
| }, |
| { |
| "epoch": 3.3017139246505014, |
| "grad_norm": 0.20474107277059672, |
| "learning_rate": 3.1215370652093817e-06, |
| "loss": 0.1728, |
| "step": 10450 |
| }, |
| { |
| "epoch": 3.304873232762025, |
| "grad_norm": 0.19685492816170327, |
| "learning_rate": 3.1113202799878104e-06, |
| "loss": 0.1736, |
| "step": 10460 |
| }, |
| { |
| "epoch": 3.3080325408735485, |
| "grad_norm": 0.20719245855817442, |
| "learning_rate": 3.1011126847749573e-06, |
| "loss": 0.1718, |
| "step": 10470 |
| }, |
| { |
| "epoch": 3.3111918489850725, |
| "grad_norm": 0.19594664304604262, |
| "learning_rate": 3.090914329239325e-06, |
| "loss": 0.1705, |
| "step": 10480 |
| }, |
| { |
| "epoch": 3.314351157096596, |
| "grad_norm": 0.19759328866966638, |
| "learning_rate": 3.0807252630044535e-06, |
| "loss": 0.1738, |
| "step": 10490 |
| }, |
| { |
| "epoch": 3.3175104652081195, |
| "grad_norm": 0.2039023386475025, |
| "learning_rate": 3.0705455356486847e-06, |
| "loss": 0.1709, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.320669773319643, |
| "grad_norm": 0.20102237377028126, |
| "learning_rate": 3.0603751967049196e-06, |
| "loss": 0.1731, |
| "step": 10510 |
| }, |
| { |
| "epoch": 3.3238290814311666, |
| "grad_norm": 0.20154946206713367, |
| "learning_rate": 3.050214295660373e-06, |
| "loss": 0.1744, |
| "step": 10520 |
| }, |
| { |
| "epoch": 3.32698838954269, |
| "grad_norm": 0.20754788610456004, |
| "learning_rate": 3.0400628819563394e-06, |
| "loss": 0.1725, |
| "step": 10530 |
| }, |
| { |
| "epoch": 3.3301476976542137, |
| "grad_norm": 0.21440535676787434, |
| "learning_rate": 3.02992100498795e-06, |
| "loss": 0.1711, |
| "step": 10540 |
| }, |
| { |
| "epoch": 3.3333070057657372, |
| "grad_norm": 0.1985749560929464, |
| "learning_rate": 3.0197887141039295e-06, |
| "loss": 0.1716, |
| "step": 10550 |
| }, |
| { |
| "epoch": 3.336466313877261, |
| "grad_norm": 0.20486416168285063, |
| "learning_rate": 3.009666058606361e-06, |
| "loss": 0.1712, |
| "step": 10560 |
| }, |
| { |
| "epoch": 3.3396256219887843, |
| "grad_norm": 0.20464416545173006, |
| "learning_rate": 2.999553087750441e-06, |
| "loss": 0.1715, |
| "step": 10570 |
| }, |
| { |
| "epoch": 3.342784930100308, |
| "grad_norm": 0.19277745618987774, |
| "learning_rate": 2.9894498507442403e-06, |
| "loss": 0.1696, |
| "step": 10580 |
| }, |
| { |
| "epoch": 3.345944238211832, |
| "grad_norm": 0.19243002023701336, |
| "learning_rate": 2.979356396748474e-06, |
| "loss": 0.1722, |
| "step": 10590 |
| }, |
| { |
| "epoch": 3.349103546323355, |
| "grad_norm": 0.184939999971442, |
| "learning_rate": 2.969272774876246e-06, |
| "loss": 0.1704, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.352262854434879, |
| "grad_norm": 0.20963451528121682, |
| "learning_rate": 2.9591990341928233e-06, |
| "loss": 0.172, |
| "step": 10610 |
| }, |
| { |
| "epoch": 3.3554221625464025, |
| "grad_norm": 0.21203412019436482, |
| "learning_rate": 2.9491352237153924e-06, |
| "loss": 0.1719, |
| "step": 10620 |
| }, |
| { |
| "epoch": 3.358581470657926, |
| "grad_norm": 0.19976350882936353, |
| "learning_rate": 2.9390813924128187e-06, |
| "loss": 0.1716, |
| "step": 10630 |
| }, |
| { |
| "epoch": 3.3617407787694495, |
| "grad_norm": 0.19765127820077447, |
| "learning_rate": 2.9290375892054145e-06, |
| "loss": 0.1719, |
| "step": 10640 |
| }, |
| { |
| "epoch": 3.364900086880973, |
| "grad_norm": 0.19819054962322868, |
| "learning_rate": 2.9190038629646928e-06, |
| "loss": 0.1718, |
| "step": 10650 |
| }, |
| { |
| "epoch": 3.3680593949924966, |
| "grad_norm": 0.2018843941060178, |
| "learning_rate": 2.9089802625131357e-06, |
| "loss": 0.1715, |
| "step": 10660 |
| }, |
| { |
| "epoch": 3.37121870310402, |
| "grad_norm": 0.2053114073761089, |
| "learning_rate": 2.898966836623956e-06, |
| "loss": 0.1712, |
| "step": 10670 |
| }, |
| { |
| "epoch": 3.3743780112155437, |
| "grad_norm": 0.1834559540518353, |
| "learning_rate": 2.888963634020856e-06, |
| "loss": 0.1718, |
| "step": 10680 |
| }, |
| { |
| "epoch": 3.3775373193270672, |
| "grad_norm": 0.21217338223459672, |
| "learning_rate": 2.8789707033777958e-06, |
| "loss": 0.17, |
| "step": 10690 |
| }, |
| { |
| "epoch": 3.380696627438591, |
| "grad_norm": 0.20413368942330248, |
| "learning_rate": 2.868988093318755e-06, |
| "loss": 0.17, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.3838559355501143, |
| "grad_norm": 0.20529368260563738, |
| "learning_rate": 2.8590158524174847e-06, |
| "loss": 0.1706, |
| "step": 10710 |
| }, |
| { |
| "epoch": 3.3870152436616383, |
| "grad_norm": 0.1996839471001918, |
| "learning_rate": 2.849054029197299e-06, |
| "loss": 0.1728, |
| "step": 10720 |
| }, |
| { |
| "epoch": 3.390174551773162, |
| "grad_norm": 0.21107475110462812, |
| "learning_rate": 2.8391026721308048e-06, |
| "loss": 0.1726, |
| "step": 10730 |
| }, |
| { |
| "epoch": 3.3933338598846854, |
| "grad_norm": 0.1977207222688015, |
| "learning_rate": 2.8291618296396906e-06, |
| "loss": 0.1717, |
| "step": 10740 |
| }, |
| { |
| "epoch": 3.396493167996209, |
| "grad_norm": 0.20112571153294398, |
| "learning_rate": 2.819231550094482e-06, |
| "loss": 0.171, |
| "step": 10750 |
| }, |
| { |
| "epoch": 3.3996524761077325, |
| "grad_norm": 0.1998438118160792, |
| "learning_rate": 2.8093118818143054e-06, |
| "loss": 0.1714, |
| "step": 10760 |
| }, |
| { |
| "epoch": 3.402811784219256, |
| "grad_norm": 0.20497839555948202, |
| "learning_rate": 2.799402873066657e-06, |
| "loss": 0.1718, |
| "step": 10770 |
| }, |
| { |
| "epoch": 3.4059710923307795, |
| "grad_norm": 0.20138016538674214, |
| "learning_rate": 2.789504572067163e-06, |
| "loss": 0.1723, |
| "step": 10780 |
| }, |
| { |
| "epoch": 3.409130400442303, |
| "grad_norm": 0.2119166594988366, |
| "learning_rate": 2.7796170269793448e-06, |
| "loss": 0.1714, |
| "step": 10790 |
| }, |
| { |
| "epoch": 3.4122897085538266, |
| "grad_norm": 0.207710303365895, |
| "learning_rate": 2.7697402859143973e-06, |
| "loss": 0.1731, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.41544901666535, |
| "grad_norm": 0.2002743432404299, |
| "learning_rate": 2.7598743969309323e-06, |
| "loss": 0.1705, |
| "step": 10810 |
| }, |
| { |
| "epoch": 3.4186083247768737, |
| "grad_norm": 0.1979710890338057, |
| "learning_rate": 2.7500194080347652e-06, |
| "loss": 0.1698, |
| "step": 10820 |
| }, |
| { |
| "epoch": 3.4217676328883972, |
| "grad_norm": 0.20374184595460798, |
| "learning_rate": 2.740175367178671e-06, |
| "loss": 0.1731, |
| "step": 10830 |
| }, |
| { |
| "epoch": 3.424926940999921, |
| "grad_norm": 0.21111483778852924, |
| "learning_rate": 2.7303423222621532e-06, |
| "loss": 0.1712, |
| "step": 10840 |
| }, |
| { |
| "epoch": 3.4280862491114448, |
| "grad_norm": 0.209638712778081, |
| "learning_rate": 2.7205203211312113e-06, |
| "loss": 0.1695, |
| "step": 10850 |
| }, |
| { |
| "epoch": 3.4312455572229683, |
| "grad_norm": 0.20749198974074953, |
| "learning_rate": 2.710709411578108e-06, |
| "loss": 0.1701, |
| "step": 10860 |
| }, |
| { |
| "epoch": 3.434404865334492, |
| "grad_norm": 0.20185076643899905, |
| "learning_rate": 2.700909641341136e-06, |
| "loss": 0.1716, |
| "step": 10870 |
| }, |
| { |
| "epoch": 3.4375641734460154, |
| "grad_norm": 0.20360708501600241, |
| "learning_rate": 2.6911210581043827e-06, |
| "loss": 0.1717, |
| "step": 10880 |
| }, |
| { |
| "epoch": 3.440723481557539, |
| "grad_norm": 0.19759420416489065, |
| "learning_rate": 2.6813437094975058e-06, |
| "loss": 0.1702, |
| "step": 10890 |
| }, |
| { |
| "epoch": 3.4438827896690625, |
| "grad_norm": 0.21258398444562132, |
| "learning_rate": 2.6715776430954948e-06, |
| "loss": 0.1712, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.447042097780586, |
| "grad_norm": 0.204687059081065, |
| "learning_rate": 2.661822906418443e-06, |
| "loss": 0.1713, |
| "step": 10910 |
| }, |
| { |
| "epoch": 3.4502014058921096, |
| "grad_norm": 0.21216451437708195, |
| "learning_rate": 2.652079546931314e-06, |
| "loss": 0.172, |
| "step": 10920 |
| }, |
| { |
| "epoch": 3.453360714003633, |
| "grad_norm": 0.20495187704615014, |
| "learning_rate": 2.642347612043713e-06, |
| "loss": 0.172, |
| "step": 10930 |
| }, |
| { |
| "epoch": 3.4565200221151566, |
| "grad_norm": 0.20577240721425508, |
| "learning_rate": 2.632627149109653e-06, |
| "loss": 0.1724, |
| "step": 10940 |
| }, |
| { |
| "epoch": 3.45967933022668, |
| "grad_norm": 0.20332569317373442, |
| "learning_rate": 2.622918205427332e-06, |
| "loss": 0.1728, |
| "step": 10950 |
| }, |
| { |
| "epoch": 3.462838638338204, |
| "grad_norm": 0.20833963528994082, |
| "learning_rate": 2.613220828238887e-06, |
| "loss": 0.1723, |
| "step": 10960 |
| }, |
| { |
| "epoch": 3.4659979464497277, |
| "grad_norm": 0.20062775671760197, |
| "learning_rate": 2.6035350647301825e-06, |
| "loss": 0.1697, |
| "step": 10970 |
| }, |
| { |
| "epoch": 3.4691572545612512, |
| "grad_norm": 0.1925666545023208, |
| "learning_rate": 2.5938609620305697e-06, |
| "loss": 0.1721, |
| "step": 10980 |
| }, |
| { |
| "epoch": 3.4723165626727748, |
| "grad_norm": 0.20615750012695935, |
| "learning_rate": 2.584198567212663e-06, |
| "loss": 0.1693, |
| "step": 10990 |
| }, |
| { |
| "epoch": 3.4754758707842983, |
| "grad_norm": 0.205868926105775, |
| "learning_rate": 2.5745479272921035e-06, |
| "loss": 0.1715, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.478635178895822, |
| "grad_norm": 0.19907532345392218, |
| "learning_rate": 2.5649090892273394e-06, |
| "loss": 0.1697, |
| "step": 11010 |
| }, |
| { |
| "epoch": 3.4817944870073454, |
| "grad_norm": 0.20241604647559086, |
| "learning_rate": 2.5552820999193893e-06, |
| "loss": 0.1714, |
| "step": 11020 |
| }, |
| { |
| "epoch": 3.484953795118869, |
| "grad_norm": 0.19925318676474033, |
| "learning_rate": 2.5456670062116227e-06, |
| "loss": 0.1702, |
| "step": 11030 |
| }, |
| { |
| "epoch": 3.4881131032303925, |
| "grad_norm": 0.20118707025616442, |
| "learning_rate": 2.5360638548895177e-06, |
| "loss": 0.1687, |
| "step": 11040 |
| }, |
| { |
| "epoch": 3.491272411341916, |
| "grad_norm": 0.21011772565832204, |
| "learning_rate": 2.526472692680455e-06, |
| "loss": 0.1723, |
| "step": 11050 |
| }, |
| { |
| "epoch": 3.4944317194534396, |
| "grad_norm": 0.2114462945794177, |
| "learning_rate": 2.5168935662534676e-06, |
| "loss": 0.1713, |
| "step": 11060 |
| }, |
| { |
| "epoch": 3.497591027564963, |
| "grad_norm": 0.20322439000336912, |
| "learning_rate": 2.507326522219031e-06, |
| "loss": 0.1722, |
| "step": 11070 |
| }, |
| { |
| "epoch": 3.5007503356764866, |
| "grad_norm": 0.20423924785828376, |
| "learning_rate": 2.497771607128826e-06, |
| "loss": 0.1711, |
| "step": 11080 |
| }, |
| { |
| "epoch": 3.5039096437880106, |
| "grad_norm": 0.21367822416543628, |
| "learning_rate": 2.4882288674755196e-06, |
| "loss": 0.1702, |
| "step": 11090 |
| }, |
| { |
| "epoch": 3.507068951899534, |
| "grad_norm": 0.20077382774697636, |
| "learning_rate": 2.4786983496925273e-06, |
| "loss": 0.1723, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.5102282600110577, |
| "grad_norm": 0.21321414495009436, |
| "learning_rate": 2.4691801001538083e-06, |
| "loss": 0.1696, |
| "step": 11110 |
| }, |
| { |
| "epoch": 3.5133875681225812, |
| "grad_norm": 0.203537804839117, |
| "learning_rate": 2.459674165173611e-06, |
| "loss": 0.1698, |
| "step": 11120 |
| }, |
| { |
| "epoch": 3.516546876234105, |
| "grad_norm": 0.2009040955436558, |
| "learning_rate": 2.450180591006278e-06, |
| "loss": 0.1716, |
| "step": 11130 |
| }, |
| { |
| "epoch": 3.5197061843456283, |
| "grad_norm": 0.20513705543314245, |
| "learning_rate": 2.440699423845994e-06, |
| "loss": 0.1721, |
| "step": 11140 |
| }, |
| { |
| "epoch": 3.522865492457152, |
| "grad_norm": 0.19738195470784428, |
| "learning_rate": 2.43123070982658e-06, |
| "loss": 0.1716, |
| "step": 11150 |
| }, |
| { |
| "epoch": 3.5260248005686754, |
| "grad_norm": 0.19923618992627787, |
| "learning_rate": 2.4217744950212603e-06, |
| "loss": 0.1722, |
| "step": 11160 |
| }, |
| { |
| "epoch": 3.529184108680199, |
| "grad_norm": 0.19543045451037108, |
| "learning_rate": 2.4123308254424397e-06, |
| "loss": 0.1722, |
| "step": 11170 |
| }, |
| { |
| "epoch": 3.5323434167917225, |
| "grad_norm": 0.20098623956184636, |
| "learning_rate": 2.4028997470414813e-06, |
| "loss": 0.1721, |
| "step": 11180 |
| }, |
| { |
| "epoch": 3.535502724903246, |
| "grad_norm": 0.19666520667412346, |
| "learning_rate": 2.393481305708481e-06, |
| "loss": 0.1718, |
| "step": 11190 |
| }, |
| { |
| "epoch": 3.53866203301477, |
| "grad_norm": 0.20769071659100188, |
| "learning_rate": 2.38407554727204e-06, |
| "loss": 0.1721, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.541821341126293, |
| "grad_norm": 0.20902129214374304, |
| "learning_rate": 2.3746825174990586e-06, |
| "loss": 0.1734, |
| "step": 11210 |
| }, |
| { |
| "epoch": 3.544980649237817, |
| "grad_norm": 0.18991888715497546, |
| "learning_rate": 2.365302262094485e-06, |
| "loss": 0.1718, |
| "step": 11220 |
| }, |
| { |
| "epoch": 3.5481399573493406, |
| "grad_norm": 0.18872022535120644, |
| "learning_rate": 2.3559348267011265e-06, |
| "loss": 0.1717, |
| "step": 11230 |
| }, |
| { |
| "epoch": 3.551299265460864, |
| "grad_norm": 0.2032395005721165, |
| "learning_rate": 2.3465802568993974e-06, |
| "loss": 0.1696, |
| "step": 11240 |
| }, |
| { |
| "epoch": 3.5544585735723877, |
| "grad_norm": 0.20260872935920152, |
| "learning_rate": 2.3372385982071155e-06, |
| "loss": 0.1699, |
| "step": 11250 |
| }, |
| { |
| "epoch": 3.5576178816839112, |
| "grad_norm": 0.19808829127100105, |
| "learning_rate": 2.3279098960792743e-06, |
| "loss": 0.1693, |
| "step": 11260 |
| }, |
| { |
| "epoch": 3.560777189795435, |
| "grad_norm": 0.20833962705825296, |
| "learning_rate": 2.318594195907826e-06, |
| "loss": 0.1716, |
| "step": 11270 |
| }, |
| { |
| "epoch": 3.5639364979069583, |
| "grad_norm": 0.20099933117353708, |
| "learning_rate": 2.3092915430214486e-06, |
| "loss": 0.171, |
| "step": 11280 |
| }, |
| { |
| "epoch": 3.567095806018482, |
| "grad_norm": 0.20739851880627683, |
| "learning_rate": 2.3000019826853464e-06, |
| "loss": 0.1693, |
| "step": 11290 |
| }, |
| { |
| "epoch": 3.5702551141300054, |
| "grad_norm": 0.19883060983816717, |
| "learning_rate": 2.2907255601010048e-06, |
| "loss": 0.1706, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.573414422241529, |
| "grad_norm": 0.200914416630117, |
| "learning_rate": 2.2814623204059954e-06, |
| "loss": 0.1705, |
| "step": 11310 |
| }, |
| { |
| "epoch": 3.5765737303530525, |
| "grad_norm": 0.18451322741288337, |
| "learning_rate": 2.272212308673733e-06, |
| "loss": 0.1702, |
| "step": 11320 |
| }, |
| { |
| "epoch": 3.5797330384645765, |
| "grad_norm": 0.20604109813637425, |
| "learning_rate": 2.262975569913274e-06, |
| "loss": 0.1716, |
| "step": 11330 |
| }, |
| { |
| "epoch": 3.5828923465760996, |
| "grad_norm": 0.20483250038192008, |
| "learning_rate": 2.2537521490690885e-06, |
| "loss": 0.1692, |
| "step": 11340 |
| }, |
| { |
| "epoch": 3.5860516546876235, |
| "grad_norm": 0.20171939003906209, |
| "learning_rate": 2.2445420910208444e-06, |
| "loss": 0.1687, |
| "step": 11350 |
| }, |
| { |
| "epoch": 3.589210962799147, |
| "grad_norm": 0.20410002768909777, |
| "learning_rate": 2.2353454405831878e-06, |
| "loss": 0.1681, |
| "step": 11360 |
| }, |
| { |
| "epoch": 3.5923702709106706, |
| "grad_norm": 0.1943525698679139, |
| "learning_rate": 2.2261622425055275e-06, |
| "loss": 0.1726, |
| "step": 11370 |
| }, |
| { |
| "epoch": 3.595529579022194, |
| "grad_norm": 0.19993993998805085, |
| "learning_rate": 2.2169925414718084e-06, |
| "loss": 0.1719, |
| "step": 11380 |
| }, |
| { |
| "epoch": 3.5986888871337177, |
| "grad_norm": 0.20390963358156805, |
| "learning_rate": 2.207836382100314e-06, |
| "loss": 0.1701, |
| "step": 11390 |
| }, |
| { |
| "epoch": 3.6018481952452412, |
| "grad_norm": 0.2052658002840104, |
| "learning_rate": 2.1986938089434217e-06, |
| "loss": 0.1715, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.605007503356765, |
| "grad_norm": 0.2050183493439441, |
| "learning_rate": 2.1895648664874107e-06, |
| "loss": 0.1719, |
| "step": 11410 |
| }, |
| { |
| "epoch": 3.6081668114682883, |
| "grad_norm": 0.2133941623970417, |
| "learning_rate": 2.1804495991522312e-06, |
| "loss": 0.1704, |
| "step": 11420 |
| }, |
| { |
| "epoch": 3.611326119579812, |
| "grad_norm": 0.19332656069708545, |
| "learning_rate": 2.171348051291293e-06, |
| "loss": 0.1681, |
| "step": 11430 |
| }, |
| { |
| "epoch": 3.614485427691336, |
| "grad_norm": 0.1933434601782223, |
| "learning_rate": 2.1622602671912507e-06, |
| "loss": 0.1704, |
| "step": 11440 |
| }, |
| { |
| "epoch": 3.617644735802859, |
| "grad_norm": 0.1983561300379488, |
| "learning_rate": 2.1531862910717864e-06, |
| "loss": 0.1706, |
| "step": 11450 |
| }, |
| { |
| "epoch": 3.620804043914383, |
| "grad_norm": 0.1979155158963241, |
| "learning_rate": 2.1441261670853886e-06, |
| "loss": 0.1686, |
| "step": 11460 |
| }, |
| { |
| "epoch": 3.6239633520259065, |
| "grad_norm": 0.19862020700111335, |
| "learning_rate": 2.1350799393171565e-06, |
| "loss": 0.1729, |
| "step": 11470 |
| }, |
| { |
| "epoch": 3.62712266013743, |
| "grad_norm": 0.19911281754040644, |
| "learning_rate": 2.1260476517845573e-06, |
| "loss": 0.1715, |
| "step": 11480 |
| }, |
| { |
| "epoch": 3.6302819682489536, |
| "grad_norm": 0.19710089391849986, |
| "learning_rate": 2.117029348437243e-06, |
| "loss": 0.1713, |
| "step": 11490 |
| }, |
| { |
| "epoch": 3.633441276360477, |
| "grad_norm": 0.2043341418376342, |
| "learning_rate": 2.108025073156806e-06, |
| "loss": 0.1719, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.6366005844720006, |
| "grad_norm": 0.22322690107677232, |
| "learning_rate": 2.09903486975659e-06, |
| "loss": 0.1729, |
| "step": 11510 |
| }, |
| { |
| "epoch": 3.639759892583524, |
| "grad_norm": 0.20236434961535074, |
| "learning_rate": 2.090058781981464e-06, |
| "loss": 0.1711, |
| "step": 11520 |
| }, |
| { |
| "epoch": 3.6429192006950477, |
| "grad_norm": 0.2123665454455019, |
| "learning_rate": 2.0810968535076126e-06, |
| "loss": 0.1701, |
| "step": 11530 |
| }, |
| { |
| "epoch": 3.6460785088065713, |
| "grad_norm": 0.20276458027943825, |
| "learning_rate": 2.0721491279423246e-06, |
| "loss": 0.1716, |
| "step": 11540 |
| }, |
| { |
| "epoch": 3.649237816918095, |
| "grad_norm": 0.18698069166162326, |
| "learning_rate": 2.063215648823781e-06, |
| "loss": 0.1682, |
| "step": 11550 |
| }, |
| { |
| "epoch": 3.6523971250296183, |
| "grad_norm": 0.19593671814658578, |
| "learning_rate": 2.0542964596208344e-06, |
| "loss": 0.1704, |
| "step": 11560 |
| }, |
| { |
| "epoch": 3.6555564331411423, |
| "grad_norm": 0.18813913173493607, |
| "learning_rate": 2.0453916037328174e-06, |
| "loss": 0.1727, |
| "step": 11570 |
| }, |
| { |
| "epoch": 3.6587157412526654, |
| "grad_norm": 0.2012970901960924, |
| "learning_rate": 2.036501124489308e-06, |
| "loss": 0.1703, |
| "step": 11580 |
| }, |
| { |
| "epoch": 3.6618750493641894, |
| "grad_norm": 0.20717799227869307, |
| "learning_rate": 2.0276250651499346e-06, |
| "loss": 0.1706, |
| "step": 11590 |
| }, |
| { |
| "epoch": 3.665034357475713, |
| "grad_norm": 0.197037281134198, |
| "learning_rate": 2.0187634689041603e-06, |
| "loss": 0.1715, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.6681936655872365, |
| "grad_norm": 0.20136557601918076, |
| "learning_rate": 2.009916378871074e-06, |
| "loss": 0.1709, |
| "step": 11610 |
| }, |
| { |
| "epoch": 3.67135297369876, |
| "grad_norm": 0.20329881900316946, |
| "learning_rate": 2.0010838380991776e-06, |
| "loss": 0.1703, |
| "step": 11620 |
| }, |
| { |
| "epoch": 3.6745122818102836, |
| "grad_norm": 0.20458710965310886, |
| "learning_rate": 1.9922658895661816e-06, |
| "loss": 0.1715, |
| "step": 11630 |
| }, |
| { |
| "epoch": 3.677671589921807, |
| "grad_norm": 0.19784202689125777, |
| "learning_rate": 1.983462576178786e-06, |
| "loss": 0.1715, |
| "step": 11640 |
| }, |
| { |
| "epoch": 3.6808308980333306, |
| "grad_norm": 0.19664140408499303, |
| "learning_rate": 1.9746739407724913e-06, |
| "loss": 0.1707, |
| "step": 11650 |
| }, |
| { |
| "epoch": 3.683990206144854, |
| "grad_norm": 0.20357355393106222, |
| "learning_rate": 1.965900026111364e-06, |
| "loss": 0.1682, |
| "step": 11660 |
| }, |
| { |
| "epoch": 3.6871495142563777, |
| "grad_norm": 0.19933592060492888, |
| "learning_rate": 1.9571408748878495e-06, |
| "loss": 0.1688, |
| "step": 11670 |
| }, |
| { |
| "epoch": 3.6903088223679017, |
| "grad_norm": 0.19698317753323233, |
| "learning_rate": 1.9483965297225545e-06, |
| "loss": 0.1708, |
| "step": 11680 |
| }, |
| { |
| "epoch": 3.693468130479425, |
| "grad_norm": 0.19171609705724965, |
| "learning_rate": 1.9396670331640427e-06, |
| "loss": 0.1714, |
| "step": 11690 |
| }, |
| { |
| "epoch": 3.6966274385909488, |
| "grad_norm": 0.20128320688947327, |
| "learning_rate": 1.930952427688626e-06, |
| "loss": 0.1699, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.6997867467024723, |
| "grad_norm": 0.19818571115089004, |
| "learning_rate": 1.9222527557001587e-06, |
| "loss": 0.1726, |
| "step": 11710 |
| }, |
| { |
| "epoch": 3.702946054813996, |
| "grad_norm": 0.20461949225316534, |
| "learning_rate": 1.913568059529832e-06, |
| "loss": 0.1708, |
| "step": 11720 |
| }, |
| { |
| "epoch": 3.7061053629255194, |
| "grad_norm": 0.1969396016325418, |
| "learning_rate": 1.9048983814359684e-06, |
| "loss": 0.1726, |
| "step": 11730 |
| }, |
| { |
| "epoch": 3.709264671037043, |
| "grad_norm": 0.20508478156000626, |
| "learning_rate": 1.8962437636038095e-06, |
| "loss": 0.171, |
| "step": 11740 |
| }, |
| { |
| "epoch": 3.7124239791485665, |
| "grad_norm": 0.19143862052589067, |
| "learning_rate": 1.8876042481453222e-06, |
| "loss": 0.1703, |
| "step": 11750 |
| }, |
| { |
| "epoch": 3.71558328726009, |
| "grad_norm": 0.19462055997090028, |
| "learning_rate": 1.8789798770989841e-06, |
| "loss": 0.1695, |
| "step": 11760 |
| }, |
| { |
| "epoch": 3.7187425953716136, |
| "grad_norm": 0.20047619813511314, |
| "learning_rate": 1.870370692429585e-06, |
| "loss": 0.169, |
| "step": 11770 |
| }, |
| { |
| "epoch": 3.721901903483137, |
| "grad_norm": 0.1997610549023301, |
| "learning_rate": 1.8617767360280182e-06, |
| "loss": 0.1722, |
| "step": 11780 |
| }, |
| { |
| "epoch": 3.7250612115946606, |
| "grad_norm": 0.2040821932549548, |
| "learning_rate": 1.8531980497110803e-06, |
| "loss": 0.1715, |
| "step": 11790 |
| }, |
| { |
| "epoch": 3.728220519706184, |
| "grad_norm": 0.20953839611305236, |
| "learning_rate": 1.8446346752212662e-06, |
| "loss": 0.1723, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.731379827817708, |
| "grad_norm": 0.19797718998398306, |
| "learning_rate": 1.8360866542265626e-06, |
| "loss": 0.1683, |
| "step": 11810 |
| }, |
| { |
| "epoch": 3.7345391359292313, |
| "grad_norm": 0.20300080445572846, |
| "learning_rate": 1.827554028320252e-06, |
| "loss": 0.1714, |
| "step": 11820 |
| }, |
| { |
| "epoch": 3.7376984440407552, |
| "grad_norm": 0.19643175410416572, |
| "learning_rate": 1.8190368390207063e-06, |
| "loss": 0.1733, |
| "step": 11830 |
| }, |
| { |
| "epoch": 3.740857752152279, |
| "grad_norm": 0.1956191661879107, |
| "learning_rate": 1.8105351277711857e-06, |
| "loss": 0.1709, |
| "step": 11840 |
| }, |
| { |
| "epoch": 3.7440170602638023, |
| "grad_norm": 0.20191888545926198, |
| "learning_rate": 1.8020489359396353e-06, |
| "loss": 0.1726, |
| "step": 11850 |
| }, |
| { |
| "epoch": 3.747176368375326, |
| "grad_norm": 0.20203040508749906, |
| "learning_rate": 1.7935783048184868e-06, |
| "loss": 0.1709, |
| "step": 11860 |
| }, |
| { |
| "epoch": 3.7503356764868494, |
| "grad_norm": 0.19868848486501622, |
| "learning_rate": 1.7851232756244542e-06, |
| "loss": 0.171, |
| "step": 11870 |
| }, |
| { |
| "epoch": 3.753494984598373, |
| "grad_norm": 0.20747266365018838, |
| "learning_rate": 1.776683889498339e-06, |
| "loss": 0.1726, |
| "step": 11880 |
| }, |
| { |
| "epoch": 3.7566542927098965, |
| "grad_norm": 0.1977057128148197, |
| "learning_rate": 1.768260187504819e-06, |
| "loss": 0.1712, |
| "step": 11890 |
| }, |
| { |
| "epoch": 3.75981360082142, |
| "grad_norm": 0.20593578039273977, |
| "learning_rate": 1.7598522106322618e-06, |
| "loss": 0.1699, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.7629729089329436, |
| "grad_norm": 0.205692865090555, |
| "learning_rate": 1.751459999792517e-06, |
| "loss": 0.1693, |
| "step": 11910 |
| }, |
| { |
| "epoch": 3.766132217044467, |
| "grad_norm": 0.20536611729964754, |
| "learning_rate": 1.7430835958207188e-06, |
| "loss": 0.1695, |
| "step": 11920 |
| }, |
| { |
| "epoch": 3.7692915251559906, |
| "grad_norm": 0.20003626633545768, |
| "learning_rate": 1.734723039475089e-06, |
| "loss": 0.1707, |
| "step": 11930 |
| }, |
| { |
| "epoch": 3.7724508332675146, |
| "grad_norm": 0.19756511256291745, |
| "learning_rate": 1.7263783714367388e-06, |
| "loss": 0.1706, |
| "step": 11940 |
| }, |
| { |
| "epoch": 3.7756101413790377, |
| "grad_norm": 0.19369294547074006, |
| "learning_rate": 1.7180496323094609e-06, |
| "loss": 0.1727, |
| "step": 11950 |
| }, |
| { |
| "epoch": 3.7787694494905617, |
| "grad_norm": 0.19163574497757527, |
| "learning_rate": 1.7097368626195548e-06, |
| "loss": 0.1716, |
| "step": 11960 |
| }, |
| { |
| "epoch": 3.7819287576020852, |
| "grad_norm": 0.19945071023523384, |
| "learning_rate": 1.7014401028156003e-06, |
| "loss": 0.17, |
| "step": 11970 |
| }, |
| { |
| "epoch": 3.785088065713609, |
| "grad_norm": 0.1941150399201019, |
| "learning_rate": 1.6931593932682893e-06, |
| "loss": 0.1716, |
| "step": 11980 |
| }, |
| { |
| "epoch": 3.7882473738251323, |
| "grad_norm": 0.19627611530197875, |
| "learning_rate": 1.6848947742702048e-06, |
| "loss": 0.17, |
| "step": 11990 |
| }, |
| { |
| "epoch": 3.791406681936656, |
| "grad_norm": 0.1985759488262882, |
| "learning_rate": 1.6766462860356425e-06, |
| "loss": 0.1705, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.7945659900481794, |
| "grad_norm": 0.2084135568916697, |
| "learning_rate": 1.6684139687004052e-06, |
| "loss": 0.1703, |
| "step": 12010 |
| }, |
| { |
| "epoch": 3.797725298159703, |
| "grad_norm": 0.20212891125021623, |
| "learning_rate": 1.6601978623216126e-06, |
| "loss": 0.1719, |
| "step": 12020 |
| }, |
| { |
| "epoch": 3.8008846062712265, |
| "grad_norm": 0.20148662876122203, |
| "learning_rate": 1.6519980068775026e-06, |
| "loss": 0.1718, |
| "step": 12030 |
| }, |
| { |
| "epoch": 3.80404391438275, |
| "grad_norm": 0.19966908684766907, |
| "learning_rate": 1.643814442267243e-06, |
| "loss": 0.1703, |
| "step": 12040 |
| }, |
| { |
| "epoch": 3.807203222494274, |
| "grad_norm": 0.19758114437735802, |
| "learning_rate": 1.6356472083107239e-06, |
| "loss": 0.1704, |
| "step": 12050 |
| }, |
| { |
| "epoch": 3.810362530605797, |
| "grad_norm": 0.2043201919277624, |
| "learning_rate": 1.6274963447483855e-06, |
| "loss": 0.1709, |
| "step": 12060 |
| }, |
| { |
| "epoch": 3.813521838717321, |
| "grad_norm": 0.2065094204780346, |
| "learning_rate": 1.6193618912410019e-06, |
| "loss": 0.1719, |
| "step": 12070 |
| }, |
| { |
| "epoch": 3.8166811468288446, |
| "grad_norm": 0.19410241771240708, |
| "learning_rate": 1.611243887369503e-06, |
| "loss": 0.1699, |
| "step": 12080 |
| }, |
| { |
| "epoch": 3.819840454940368, |
| "grad_norm": 0.19582642907268447, |
| "learning_rate": 1.6031423726347778e-06, |
| "loss": 0.1703, |
| "step": 12090 |
| }, |
| { |
| "epoch": 3.8229997630518917, |
| "grad_norm": 0.1995506401309092, |
| "learning_rate": 1.5950573864574808e-06, |
| "loss": 0.1686, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.8261590711634152, |
| "grad_norm": 0.19809864311658273, |
| "learning_rate": 1.5869889681778411e-06, |
| "loss": 0.1705, |
| "step": 12110 |
| }, |
| { |
| "epoch": 3.829318379274939, |
| "grad_norm": 0.20274297131703675, |
| "learning_rate": 1.5789371570554729e-06, |
| "loss": 0.1727, |
| "step": 12120 |
| }, |
| { |
| "epoch": 3.8324776873864623, |
| "grad_norm": 0.20601567032087037, |
| "learning_rate": 1.570901992269177e-06, |
| "loss": 0.1693, |
| "step": 12130 |
| }, |
| { |
| "epoch": 3.835636995497986, |
| "grad_norm": 0.19456210693609077, |
| "learning_rate": 1.5628835129167662e-06, |
| "loss": 0.1701, |
| "step": 12140 |
| }, |
| { |
| "epoch": 3.8387963036095094, |
| "grad_norm": 0.20115987503568447, |
| "learning_rate": 1.5548817580148517e-06, |
| "loss": 0.1721, |
| "step": 12150 |
| }, |
| { |
| "epoch": 3.841955611721033, |
| "grad_norm": 0.1958665568478278, |
| "learning_rate": 1.54689676649868e-06, |
| "loss": 0.1707, |
| "step": 12160 |
| }, |
| { |
| "epoch": 3.8451149198325565, |
| "grad_norm": 0.20025006804402964, |
| "learning_rate": 1.5389285772219176e-06, |
| "loss": 0.1702, |
| "step": 12170 |
| }, |
| { |
| "epoch": 3.8482742279440805, |
| "grad_norm": 0.20824644715314478, |
| "learning_rate": 1.5309772289564806e-06, |
| "loss": 0.1713, |
| "step": 12180 |
| }, |
| { |
| "epoch": 3.8514335360556036, |
| "grad_norm": 0.20543389482537658, |
| "learning_rate": 1.5230427603923386e-06, |
| "loss": 0.1714, |
| "step": 12190 |
| }, |
| { |
| "epoch": 3.8545928441671276, |
| "grad_norm": 0.201515908971831, |
| "learning_rate": 1.5151252101373266e-06, |
| "loss": 0.1729, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.857752152278651, |
| "grad_norm": 0.2016189641289871, |
| "learning_rate": 1.5072246167169574e-06, |
| "loss": 0.1701, |
| "step": 12210 |
| }, |
| { |
| "epoch": 3.8609114603901746, |
| "grad_norm": 0.19876889094407768, |
| "learning_rate": 1.4993410185742374e-06, |
| "loss": 0.1689, |
| "step": 12220 |
| }, |
| { |
| "epoch": 3.864070768501698, |
| "grad_norm": 0.20500710077419468, |
| "learning_rate": 1.4914744540694697e-06, |
| "loss": 0.1714, |
| "step": 12230 |
| }, |
| { |
| "epoch": 3.8672300766132217, |
| "grad_norm": 0.20129831840042556, |
| "learning_rate": 1.4836249614800857e-06, |
| "loss": 0.1706, |
| "step": 12240 |
| }, |
| { |
| "epoch": 3.8703893847247453, |
| "grad_norm": 0.1945834822439153, |
| "learning_rate": 1.4757925790004362e-06, |
| "loss": 0.1709, |
| "step": 12250 |
| }, |
| { |
| "epoch": 3.873548692836269, |
| "grad_norm": 0.1987155834635916, |
| "learning_rate": 1.467977344741624e-06, |
| "loss": 0.1717, |
| "step": 12260 |
| }, |
| { |
| "epoch": 3.8767080009477923, |
| "grad_norm": 0.21759266570145575, |
| "learning_rate": 1.4601792967313095e-06, |
| "loss": 0.1712, |
| "step": 12270 |
| }, |
| { |
| "epoch": 3.879867309059316, |
| "grad_norm": 0.20382637953104524, |
| "learning_rate": 1.4523984729135272e-06, |
| "loss": 0.1714, |
| "step": 12280 |
| }, |
| { |
| "epoch": 3.88302661717084, |
| "grad_norm": 0.1916842281105115, |
| "learning_rate": 1.444634911148502e-06, |
| "loss": 0.1692, |
| "step": 12290 |
| }, |
| { |
| "epoch": 3.886185925282363, |
| "grad_norm": 0.1946183364574122, |
| "learning_rate": 1.4368886492124661e-06, |
| "loss": 0.1699, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.889345233393887, |
| "grad_norm": 0.19920320984504467, |
| "learning_rate": 1.429159724797467e-06, |
| "loss": 0.1697, |
| "step": 12310 |
| }, |
| { |
| "epoch": 3.8925045415054105, |
| "grad_norm": 0.19953879740403313, |
| "learning_rate": 1.421448175511202e-06, |
| "loss": 0.1694, |
| "step": 12320 |
| }, |
| { |
| "epoch": 3.895663849616934, |
| "grad_norm": 0.20468537426685904, |
| "learning_rate": 1.4137540388768107e-06, |
| "loss": 0.1722, |
| "step": 12330 |
| }, |
| { |
| "epoch": 3.8988231577284576, |
| "grad_norm": 0.20676153968954222, |
| "learning_rate": 1.4060773523327175e-06, |
| "loss": 0.173, |
| "step": 12340 |
| }, |
| { |
| "epoch": 3.901982465839981, |
| "grad_norm": 0.21412532800510253, |
| "learning_rate": 1.3984181532324291e-06, |
| "loss": 0.17, |
| "step": 12350 |
| }, |
| { |
| "epoch": 3.9051417739515046, |
| "grad_norm": 0.20530217895484593, |
| "learning_rate": 1.3907764788443651e-06, |
| "loss": 0.1718, |
| "step": 12360 |
| }, |
| { |
| "epoch": 3.908301082063028, |
| "grad_norm": 0.19397608625457688, |
| "learning_rate": 1.383152366351671e-06, |
| "loss": 0.171, |
| "step": 12370 |
| }, |
| { |
| "epoch": 3.9114603901745517, |
| "grad_norm": 0.19792598868494216, |
| "learning_rate": 1.3755458528520422e-06, |
| "loss": 0.1691, |
| "step": 12380 |
| }, |
| { |
| "epoch": 3.9146196982860753, |
| "grad_norm": 0.19530536153646347, |
| "learning_rate": 1.3679569753575321e-06, |
| "loss": 0.1713, |
| "step": 12390 |
| }, |
| { |
| "epoch": 3.917779006397599, |
| "grad_norm": 0.19937572369371176, |
| "learning_rate": 1.3603857707943934e-06, |
| "loss": 0.1718, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.9209383145091223, |
| "grad_norm": 0.2045657222636314, |
| "learning_rate": 1.3528322760028706e-06, |
| "loss": 0.1705, |
| "step": 12410 |
| }, |
| { |
| "epoch": 3.9240976226206463, |
| "grad_norm": 0.19538054890753861, |
| "learning_rate": 1.345296527737049e-06, |
| "loss": 0.17, |
| "step": 12420 |
| }, |
| { |
| "epoch": 3.9272569307321694, |
| "grad_norm": 0.19898434578570567, |
| "learning_rate": 1.3377785626646505e-06, |
| "loss": 0.1708, |
| "step": 12430 |
| }, |
| { |
| "epoch": 3.9304162388436934, |
| "grad_norm": 0.20793904574642982, |
| "learning_rate": 1.3302784173668732e-06, |
| "loss": 0.17, |
| "step": 12440 |
| }, |
| { |
| "epoch": 3.933575546955217, |
| "grad_norm": 0.20762680354960933, |
| "learning_rate": 1.322796128338207e-06, |
| "loss": 0.1724, |
| "step": 12450 |
| }, |
| { |
| "epoch": 3.9367348550667405, |
| "grad_norm": 0.2018512947411419, |
| "learning_rate": 1.315331731986253e-06, |
| "loss": 0.1695, |
| "step": 12460 |
| }, |
| { |
| "epoch": 3.939894163178264, |
| "grad_norm": 0.19579741223203112, |
| "learning_rate": 1.3078852646315532e-06, |
| "loss": 0.1718, |
| "step": 12470 |
| }, |
| { |
| "epoch": 3.9430534712897876, |
| "grad_norm": 0.20067280716130292, |
| "learning_rate": 1.3004567625074083e-06, |
| "loss": 0.1701, |
| "step": 12480 |
| }, |
| { |
| "epoch": 3.946212779401311, |
| "grad_norm": 0.19388666686105327, |
| "learning_rate": 1.2930462617596996e-06, |
| "loss": 0.1711, |
| "step": 12490 |
| }, |
| { |
| "epoch": 3.9493720875128346, |
| "grad_norm": 0.19875001659515246, |
| "learning_rate": 1.285653798446725e-06, |
| "loss": 0.1725, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.952531395624358, |
| "grad_norm": 0.20497115291022464, |
| "learning_rate": 1.278279408539006e-06, |
| "loss": 0.1699, |
| "step": 12510 |
| }, |
| { |
| "epoch": 3.9556907037358817, |
| "grad_norm": 0.19011102711328773, |
| "learning_rate": 1.270923127919128e-06, |
| "loss": 0.1697, |
| "step": 12520 |
| }, |
| { |
| "epoch": 3.9588500118474053, |
| "grad_norm": 0.20042416730472096, |
| "learning_rate": 1.2635849923815562e-06, |
| "loss": 0.1711, |
| "step": 12530 |
| }, |
| { |
| "epoch": 3.962009319958929, |
| "grad_norm": 0.19397058835275352, |
| "learning_rate": 1.2562650376324675e-06, |
| "loss": 0.1715, |
| "step": 12540 |
| }, |
| { |
| "epoch": 3.965168628070453, |
| "grad_norm": 0.20187863747529436, |
| "learning_rate": 1.2489632992895722e-06, |
| "loss": 0.173, |
| "step": 12550 |
| }, |
| { |
| "epoch": 3.968327936181976, |
| "grad_norm": 0.19498840804702408, |
| "learning_rate": 1.2416798128819446e-06, |
| "loss": 0.1699, |
| "step": 12560 |
| }, |
| { |
| "epoch": 3.9714872442935, |
| "grad_norm": 0.1971626109701413, |
| "learning_rate": 1.2344146138498414e-06, |
| "loss": 0.1707, |
| "step": 12570 |
| }, |
| { |
| "epoch": 3.9746465524050234, |
| "grad_norm": 0.19237195141765748, |
| "learning_rate": 1.2271677375445474e-06, |
| "loss": 0.1723, |
| "step": 12580 |
| }, |
| { |
| "epoch": 3.977805860516547, |
| "grad_norm": 0.19369835876699062, |
| "learning_rate": 1.2199392192281805e-06, |
| "loss": 0.1722, |
| "step": 12590 |
| }, |
| { |
| "epoch": 3.9809651686280705, |
| "grad_norm": 0.20431461072784543, |
| "learning_rate": 1.2127290940735387e-06, |
| "loss": 0.1688, |
| "step": 12600 |
| }, |
| { |
| "epoch": 3.984124476739594, |
| "grad_norm": 0.20529642623566896, |
| "learning_rate": 1.2055373971639195e-06, |
| "loss": 0.168, |
| "step": 12610 |
| }, |
| { |
| "epoch": 3.9872837848511176, |
| "grad_norm": 0.19084232186590003, |
| "learning_rate": 1.1983641634929522e-06, |
| "loss": 0.17, |
| "step": 12620 |
| }, |
| { |
| "epoch": 3.990443092962641, |
| "grad_norm": 0.20780766829176076, |
| "learning_rate": 1.1912094279644265e-06, |
| "loss": 0.1679, |
| "step": 12630 |
| }, |
| { |
| "epoch": 3.9936024010741646, |
| "grad_norm": 0.19468886303948824, |
| "learning_rate": 1.1840732253921227e-06, |
| "loss": 0.1686, |
| "step": 12640 |
| }, |
| { |
| "epoch": 3.996761709185688, |
| "grad_norm": 0.19648890342838896, |
| "learning_rate": 1.1769555904996454e-06, |
| "loss": 0.1704, |
| "step": 12650 |
| }, |
| { |
| "epoch": 3.999921017297212, |
| "grad_norm": 0.19284335982757503, |
| "learning_rate": 1.1698565579202465e-06, |
| "loss": 0.1704, |
| "step": 12660 |
| }, |
| { |
| "epoch": 4.002843377300371, |
| "grad_norm": 0.19754930089679965, |
| "learning_rate": 1.1627761621966671e-06, |
| "loss": 0.1487, |
| "step": 12670 |
| }, |
| { |
| "epoch": 4.006002685411895, |
| "grad_norm": 0.19520779806174013, |
| "learning_rate": 1.1557144377809626e-06, |
| "loss": 0.1588, |
| "step": 12680 |
| }, |
| { |
| "epoch": 4.009161993523418, |
| "grad_norm": 0.19859317904582857, |
| "learning_rate": 1.1486714190343367e-06, |
| "loss": 0.1596, |
| "step": 12690 |
| }, |
| { |
| "epoch": 4.012321301634942, |
| "grad_norm": 0.20028049175205914, |
| "learning_rate": 1.1416471402269747e-06, |
| "loss": 0.1581, |
| "step": 12700 |
| }, |
| { |
| "epoch": 4.015480609746466, |
| "grad_norm": 0.18619306242942488, |
| "learning_rate": 1.1346416355378764e-06, |
| "loss": 0.1598, |
| "step": 12710 |
| }, |
| { |
| "epoch": 4.018639917857989, |
| "grad_norm": 0.19084584057427986, |
| "learning_rate": 1.1276549390546893e-06, |
| "loss": 0.1598, |
| "step": 12720 |
| }, |
| { |
| "epoch": 4.021799225969513, |
| "grad_norm": 0.1936540709695895, |
| "learning_rate": 1.120687084773545e-06, |
| "loss": 0.159, |
| "step": 12730 |
| }, |
| { |
| "epoch": 4.024958534081036, |
| "grad_norm": 0.1975921539537816, |
| "learning_rate": 1.1137381065988878e-06, |
| "loss": 0.1583, |
| "step": 12740 |
| }, |
| { |
| "epoch": 4.02811784219256, |
| "grad_norm": 0.19969147674976237, |
| "learning_rate": 1.1068080383433188e-06, |
| "loss": 0.1602, |
| "step": 12750 |
| }, |
| { |
| "epoch": 4.031277150304083, |
| "grad_norm": 0.19352808568437072, |
| "learning_rate": 1.0998969137274234e-06, |
| "loss": 0.1597, |
| "step": 12760 |
| }, |
| { |
| "epoch": 4.034436458415607, |
| "grad_norm": 0.19339061581403172, |
| "learning_rate": 1.0930047663796117e-06, |
| "loss": 0.1618, |
| "step": 12770 |
| }, |
| { |
| "epoch": 4.03759576652713, |
| "grad_norm": 0.19477299915795446, |
| "learning_rate": 1.0861316298359537e-06, |
| "loss": 0.1584, |
| "step": 12780 |
| }, |
| { |
| "epoch": 4.040755074638654, |
| "grad_norm": 0.19747662712691139, |
| "learning_rate": 1.0792775375400143e-06, |
| "loss": 0.1598, |
| "step": 12790 |
| }, |
| { |
| "epoch": 4.043914382750177, |
| "grad_norm": 0.19362010628396598, |
| "learning_rate": 1.0724425228426938e-06, |
| "loss": 0.1609, |
| "step": 12800 |
| }, |
| { |
| "epoch": 4.047073690861701, |
| "grad_norm": 0.1978158624641635, |
| "learning_rate": 1.0656266190020648e-06, |
| "loss": 0.1604, |
| "step": 12810 |
| }, |
| { |
| "epoch": 4.050232998973225, |
| "grad_norm": 0.2017835581795631, |
| "learning_rate": 1.058829859183204e-06, |
| "loss": 0.1595, |
| "step": 12820 |
| }, |
| { |
| "epoch": 4.053392307084748, |
| "grad_norm": 0.19505342405207957, |
| "learning_rate": 1.0520522764580466e-06, |
| "loss": 0.1601, |
| "step": 12830 |
| }, |
| { |
| "epoch": 4.056551615196272, |
| "grad_norm": 0.19659192583114876, |
| "learning_rate": 1.0452939038052045e-06, |
| "loss": 0.1582, |
| "step": 12840 |
| }, |
| { |
| "epoch": 4.0597109233077955, |
| "grad_norm": 0.19062889408725014, |
| "learning_rate": 1.0385547741098222e-06, |
| "loss": 0.1594, |
| "step": 12850 |
| }, |
| { |
| "epoch": 4.0628702314193195, |
| "grad_norm": 0.2007575871127392, |
| "learning_rate": 1.0318349201634116e-06, |
| "loss": 0.1609, |
| "step": 12860 |
| }, |
| { |
| "epoch": 4.066029539530843, |
| "grad_norm": 0.19407495103479086, |
| "learning_rate": 1.02513437466369e-06, |
| "loss": 0.1601, |
| "step": 12870 |
| }, |
| { |
| "epoch": 4.0691888476423665, |
| "grad_norm": 0.1926256014241401, |
| "learning_rate": 1.01845317021442e-06, |
| "loss": 0.1597, |
| "step": 12880 |
| }, |
| { |
| "epoch": 4.07234815575389, |
| "grad_norm": 0.19084665134324094, |
| "learning_rate": 1.0117913393252632e-06, |
| "loss": 0.1605, |
| "step": 12890 |
| }, |
| { |
| "epoch": 4.075507463865414, |
| "grad_norm": 0.19709574860841494, |
| "learning_rate": 1.0051489144116e-06, |
| "loss": 0.1608, |
| "step": 12900 |
| }, |
| { |
| "epoch": 4.078666771976937, |
| "grad_norm": 0.20095251133089448, |
| "learning_rate": 9.985259277943977e-07, |
| "loss": 0.1602, |
| "step": 12910 |
| }, |
| { |
| "epoch": 4.081826080088461, |
| "grad_norm": 0.19685311750698353, |
| "learning_rate": 9.919224117000281e-07, |
| "loss": 0.1614, |
| "step": 12920 |
| }, |
| { |
| "epoch": 4.084985388199984, |
| "grad_norm": 0.20241005189325584, |
| "learning_rate": 9.853383982601294e-07, |
| "loss": 0.1596, |
| "step": 12930 |
| }, |
| { |
| "epoch": 4.088144696311508, |
| "grad_norm": 0.19400175840515052, |
| "learning_rate": 9.787739195114427e-07, |
| "loss": 0.1592, |
| "step": 12940 |
| }, |
| { |
| "epoch": 4.091304004423032, |
| "grad_norm": 0.20303783315409038, |
| "learning_rate": 9.722290073956536e-07, |
| "loss": 0.1597, |
| "step": 12950 |
| }, |
| { |
| "epoch": 4.094463312534555, |
| "grad_norm": 0.1936600304781095, |
| "learning_rate": 9.657036937592423e-07, |
| "loss": 0.1621, |
| "step": 12960 |
| }, |
| { |
| "epoch": 4.097622620646079, |
| "grad_norm": 0.19796367201440496, |
| "learning_rate": 9.59198010353326e-07, |
| "loss": 0.1597, |
| "step": 12970 |
| }, |
| { |
| "epoch": 4.100781928757602, |
| "grad_norm": 0.18342546130725573, |
| "learning_rate": 9.527119888334996e-07, |
| "loss": 0.1582, |
| "step": 12980 |
| }, |
| { |
| "epoch": 4.103941236869126, |
| "grad_norm": 0.20317670235586452, |
| "learning_rate": 9.462456607596954e-07, |
| "loss": 0.1603, |
| "step": 12990 |
| }, |
| { |
| "epoch": 4.107100544980649, |
| "grad_norm": 0.19569726312873856, |
| "learning_rate": 9.397990575960103e-07, |
| "loss": 0.1578, |
| "step": 13000 |
| }, |
| { |
| "epoch": 4.110259853092173, |
| "grad_norm": 0.20338486160541247, |
| "learning_rate": 9.333722107105725e-07, |
| "loss": 0.1606, |
| "step": 13010 |
| }, |
| { |
| "epoch": 4.113419161203696, |
| "grad_norm": 0.1875071227775237, |
| "learning_rate": 9.269651513753725e-07, |
| "loss": 0.1603, |
| "step": 13020 |
| }, |
| { |
| "epoch": 4.11657846931522, |
| "grad_norm": 0.1927619122892459, |
| "learning_rate": 9.205779107661201e-07, |
| "loss": 0.1581, |
| "step": 13030 |
| }, |
| { |
| "epoch": 4.119737777426743, |
| "grad_norm": 0.20452574688837152, |
| "learning_rate": 9.142105199620916e-07, |
| "loss": 0.159, |
| "step": 13040 |
| }, |
| { |
| "epoch": 4.122897085538267, |
| "grad_norm": 0.2023565306937573, |
| "learning_rate": 9.078630099459768e-07, |
| "loss": 0.1604, |
| "step": 13050 |
| }, |
| { |
| "epoch": 4.12605639364979, |
| "grad_norm": 0.19951162187159371, |
| "learning_rate": 9.015354116037256e-07, |
| "loss": 0.158, |
| "step": 13060 |
| }, |
| { |
| "epoch": 4.129215701761314, |
| "grad_norm": 0.19668566056760164, |
| "learning_rate": 8.952277557244077e-07, |
| "loss": 0.1589, |
| "step": 13070 |
| }, |
| { |
| "epoch": 4.132375009872838, |
| "grad_norm": 0.18983274506050818, |
| "learning_rate": 8.889400730000475e-07, |
| "loss": 0.1599, |
| "step": 13080 |
| }, |
| { |
| "epoch": 4.135534317984361, |
| "grad_norm": 0.19379806465727592, |
| "learning_rate": 8.826723940254923e-07, |
| "loss": 0.1614, |
| "step": 13090 |
| }, |
| { |
| "epoch": 4.138693626095885, |
| "grad_norm": 0.2015655061435355, |
| "learning_rate": 8.76424749298247e-07, |
| "loss": 0.1596, |
| "step": 13100 |
| }, |
| { |
| "epoch": 4.141852934207408, |
| "grad_norm": 0.20253336827566704, |
| "learning_rate": 8.701971692183365e-07, |
| "loss": 0.1605, |
| "step": 13110 |
| }, |
| { |
| "epoch": 4.145012242318932, |
| "grad_norm": 0.19563497160996118, |
| "learning_rate": 8.639896840881534e-07, |
| "loss": 0.1607, |
| "step": 13120 |
| }, |
| { |
| "epoch": 4.1481715504304555, |
| "grad_norm": 0.20060195439697434, |
| "learning_rate": 8.578023241123134e-07, |
| "loss": 0.16, |
| "step": 13130 |
| }, |
| { |
| "epoch": 4.1513308585419795, |
| "grad_norm": 0.19835862074182956, |
| "learning_rate": 8.516351193975042e-07, |
| "loss": 0.1631, |
| "step": 13140 |
| }, |
| { |
| "epoch": 4.154490166653503, |
| "grad_norm": 0.1869510365958818, |
| "learning_rate": 8.454880999523435e-07, |
| "loss": 0.1587, |
| "step": 13150 |
| }, |
| { |
| "epoch": 4.1576494747650266, |
| "grad_norm": 0.20004584908939843, |
| "learning_rate": 8.393612956872254e-07, |
| "loss": 0.1621, |
| "step": 13160 |
| }, |
| { |
| "epoch": 4.16080878287655, |
| "grad_norm": 0.1856765556024912, |
| "learning_rate": 8.332547364141891e-07, |
| "loss": 0.159, |
| "step": 13170 |
| }, |
| { |
| "epoch": 4.163968090988074, |
| "grad_norm": 0.19589805621856057, |
| "learning_rate": 8.271684518467571e-07, |
| "loss": 0.1602, |
| "step": 13180 |
| }, |
| { |
| "epoch": 4.167127399099598, |
| "grad_norm": 0.19689293762125867, |
| "learning_rate": 8.211024715998023e-07, |
| "loss": 0.1591, |
| "step": 13190 |
| }, |
| { |
| "epoch": 4.170286707211121, |
| "grad_norm": 0.19503964405624044, |
| "learning_rate": 8.150568251893992e-07, |
| "loss": 0.1604, |
| "step": 13200 |
| }, |
| { |
| "epoch": 4.173446015322645, |
| "grad_norm": 0.1932945597906981, |
| "learning_rate": 8.09031542032681e-07, |
| "loss": 0.1596, |
| "step": 13210 |
| }, |
| { |
| "epoch": 4.176605323434168, |
| "grad_norm": 0.1954697082605763, |
| "learning_rate": 8.030266514476976e-07, |
| "loss": 0.1596, |
| "step": 13220 |
| }, |
| { |
| "epoch": 4.179764631545692, |
| "grad_norm": 0.18739853815459867, |
| "learning_rate": 7.97042182653271e-07, |
| "loss": 0.1611, |
| "step": 13230 |
| }, |
| { |
| "epoch": 4.182923939657215, |
| "grad_norm": 0.2008506504946661, |
| "learning_rate": 7.910781647688515e-07, |
| "loss": 0.1594, |
| "step": 13240 |
| }, |
| { |
| "epoch": 4.186083247768739, |
| "grad_norm": 0.1997520617477428, |
| "learning_rate": 7.851346268143861e-07, |
| "loss": 0.1594, |
| "step": 13250 |
| }, |
| { |
| "epoch": 4.189242555880262, |
| "grad_norm": 0.19380452684319097, |
| "learning_rate": 7.7921159771016e-07, |
| "loss": 0.1608, |
| "step": 13260 |
| }, |
| { |
| "epoch": 4.192401863991786, |
| "grad_norm": 0.19204023557075717, |
| "learning_rate": 7.733091062766751e-07, |
| "loss": 0.1603, |
| "step": 13270 |
| }, |
| { |
| "epoch": 4.195561172103309, |
| "grad_norm": 0.1963780227663788, |
| "learning_rate": 7.674271812344935e-07, |
| "loss": 0.1581, |
| "step": 13280 |
| }, |
| { |
| "epoch": 4.198720480214833, |
| "grad_norm": 0.18775094836052963, |
| "learning_rate": 7.615658512041068e-07, |
| "loss": 0.1585, |
| "step": 13290 |
| }, |
| { |
| "epoch": 4.201879788326356, |
| "grad_norm": 0.1879252136754587, |
| "learning_rate": 7.557251447057962e-07, |
| "loss": 0.16, |
| "step": 13300 |
| }, |
| { |
| "epoch": 4.20503909643788, |
| "grad_norm": 0.19489071033267957, |
| "learning_rate": 7.499050901594896e-07, |
| "loss": 0.1587, |
| "step": 13310 |
| }, |
| { |
| "epoch": 4.208198404549404, |
| "grad_norm": 0.20273324039262924, |
| "learning_rate": 7.441057158846276e-07, |
| "loss": 0.1591, |
| "step": 13320 |
| }, |
| { |
| "epoch": 4.211357712660927, |
| "grad_norm": 0.1982379447248168, |
| "learning_rate": 7.383270501000245e-07, |
| "loss": 0.1599, |
| "step": 13330 |
| }, |
| { |
| "epoch": 4.214517020772451, |
| "grad_norm": 0.1856228450158758, |
| "learning_rate": 7.325691209237251e-07, |
| "loss": 0.1581, |
| "step": 13340 |
| }, |
| { |
| "epoch": 4.217676328883974, |
| "grad_norm": 0.19437453651795136, |
| "learning_rate": 7.268319563728831e-07, |
| "loss": 0.1586, |
| "step": 13350 |
| }, |
| { |
| "epoch": 4.220835636995498, |
| "grad_norm": 0.18694690942616607, |
| "learning_rate": 7.211155843636059e-07, |
| "loss": 0.1603, |
| "step": 13360 |
| }, |
| { |
| "epoch": 4.223994945107021, |
| "grad_norm": 0.19393003030705352, |
| "learning_rate": 7.154200327108313e-07, |
| "loss": 0.162, |
| "step": 13370 |
| }, |
| { |
| "epoch": 4.227154253218545, |
| "grad_norm": 0.1927448263517383, |
| "learning_rate": 7.097453291281887e-07, |
| "loss": 0.1612, |
| "step": 13380 |
| }, |
| { |
| "epoch": 4.230313561330068, |
| "grad_norm": 0.20215663142647716, |
| "learning_rate": 7.040915012278648e-07, |
| "loss": 0.1589, |
| "step": 13390 |
| }, |
| { |
| "epoch": 4.233472869441592, |
| "grad_norm": 0.21545803120847262, |
| "learning_rate": 6.984585765204665e-07, |
| "loss": 0.16, |
| "step": 13400 |
| }, |
| { |
| "epoch": 4.2366321775531155, |
| "grad_norm": 0.20075406426026432, |
| "learning_rate": 6.928465824148923e-07, |
| "loss": 0.1594, |
| "step": 13410 |
| }, |
| { |
| "epoch": 4.2397914856646395, |
| "grad_norm": 0.2394877373371559, |
| "learning_rate": 6.872555462181907e-07, |
| "loss": 0.1592, |
| "step": 13420 |
| }, |
| { |
| "epoch": 4.2429507937761635, |
| "grad_norm": 0.19216510992720218, |
| "learning_rate": 6.816854951354396e-07, |
| "loss": 0.1573, |
| "step": 13430 |
| }, |
| { |
| "epoch": 4.246110101887687, |
| "grad_norm": 0.2020202616715345, |
| "learning_rate": 6.761364562695993e-07, |
| "loss": 0.161, |
| "step": 13440 |
| }, |
| { |
| "epoch": 4.2492694099992105, |
| "grad_norm": 0.189014469828388, |
| "learning_rate": 6.706084566213933e-07, |
| "loss": 0.1589, |
| "step": 13450 |
| }, |
| { |
| "epoch": 4.252428718110734, |
| "grad_norm": 0.19279477497914185, |
| "learning_rate": 6.651015230891694e-07, |
| "loss": 0.1608, |
| "step": 13460 |
| }, |
| { |
| "epoch": 4.255588026222258, |
| "grad_norm": 0.20318784264174414, |
| "learning_rate": 6.596156824687722e-07, |
| "loss": 0.1596, |
| "step": 13470 |
| }, |
| { |
| "epoch": 4.258747334333781, |
| "grad_norm": 0.19924677539875918, |
| "learning_rate": 6.541509614534103e-07, |
| "loss": 0.1593, |
| "step": 13480 |
| }, |
| { |
| "epoch": 4.261906642445305, |
| "grad_norm": 0.20657626622701158, |
| "learning_rate": 6.487073866335298e-07, |
| "loss": 0.1598, |
| "step": 13490 |
| }, |
| { |
| "epoch": 4.265065950556828, |
| "grad_norm": 0.18954862382773452, |
| "learning_rate": 6.432849844966782e-07, |
| "loss": 0.1607, |
| "step": 13500 |
| }, |
| { |
| "epoch": 4.268225258668352, |
| "grad_norm": 0.20096767940062027, |
| "learning_rate": 6.378837814273886e-07, |
| "loss": 0.1602, |
| "step": 13510 |
| }, |
| { |
| "epoch": 4.271384566779875, |
| "grad_norm": 0.19873738285793385, |
| "learning_rate": 6.325038037070336e-07, |
| "loss": 0.1602, |
| "step": 13520 |
| }, |
| { |
| "epoch": 4.274543874891399, |
| "grad_norm": 0.2026478071540902, |
| "learning_rate": 6.271450775137116e-07, |
| "loss": 0.1579, |
| "step": 13530 |
| }, |
| { |
| "epoch": 4.277703183002922, |
| "grad_norm": 0.19853177565270944, |
| "learning_rate": 6.218076289221153e-07, |
| "loss": 0.1598, |
| "step": 13540 |
| }, |
| { |
| "epoch": 4.280862491114446, |
| "grad_norm": 0.20829772712743705, |
| "learning_rate": 6.164914839034008e-07, |
| "loss": 0.1587, |
| "step": 13550 |
| }, |
| { |
| "epoch": 4.28402179922597, |
| "grad_norm": 0.196256766990733, |
| "learning_rate": 6.111966683250681e-07, |
| "loss": 0.1604, |
| "step": 13560 |
| }, |
| { |
| "epoch": 4.287181107337493, |
| "grad_norm": 0.19618426976460837, |
| "learning_rate": 6.059232079508276e-07, |
| "loss": 0.1603, |
| "step": 13570 |
| }, |
| { |
| "epoch": 4.290340415449017, |
| "grad_norm": 0.202438781233527, |
| "learning_rate": 6.006711284404837e-07, |
| "loss": 0.1612, |
| "step": 13580 |
| }, |
| { |
| "epoch": 4.29349972356054, |
| "grad_norm": 0.19704572347653887, |
| "learning_rate": 5.954404553497989e-07, |
| "loss": 0.1602, |
| "step": 13590 |
| }, |
| { |
| "epoch": 4.296659031672064, |
| "grad_norm": 0.19168113950072857, |
| "learning_rate": 5.902312141303806e-07, |
| "loss": 0.1604, |
| "step": 13600 |
| }, |
| { |
| "epoch": 4.299818339783587, |
| "grad_norm": 0.2003879313833993, |
| "learning_rate": 5.850434301295494e-07, |
| "loss": 0.1596, |
| "step": 13610 |
| }, |
| { |
| "epoch": 4.302977647895111, |
| "grad_norm": 0.19759760667562565, |
| "learning_rate": 5.798771285902205e-07, |
| "loss": 0.1604, |
| "step": 13620 |
| }, |
| { |
| "epoch": 4.306136956006634, |
| "grad_norm": 0.19863833857948981, |
| "learning_rate": 5.747323346507777e-07, |
| "loss": 0.1592, |
| "step": 13630 |
| }, |
| { |
| "epoch": 4.309296264118158, |
| "grad_norm": 0.2024378958773524, |
| "learning_rate": 5.696090733449528e-07, |
| "loss": 0.1601, |
| "step": 13640 |
| }, |
| { |
| "epoch": 4.312455572229681, |
| "grad_norm": 0.20215707871180005, |
| "learning_rate": 5.645073696017028e-07, |
| "loss": 0.1585, |
| "step": 13650 |
| }, |
| { |
| "epoch": 4.315614880341205, |
| "grad_norm": 0.18689645886289935, |
| "learning_rate": 5.594272482450902e-07, |
| "loss": 0.1573, |
| "step": 13660 |
| }, |
| { |
| "epoch": 4.318774188452728, |
| "grad_norm": 0.2053319708006727, |
| "learning_rate": 5.543687339941584e-07, |
| "loss": 0.1615, |
| "step": 13670 |
| }, |
| { |
| "epoch": 4.321933496564252, |
| "grad_norm": 0.19171061220997784, |
| "learning_rate": 5.493318514628171e-07, |
| "loss": 0.1616, |
| "step": 13680 |
| }, |
| { |
| "epoch": 4.325092804675776, |
| "grad_norm": 0.19563207560203782, |
| "learning_rate": 5.443166251597187e-07, |
| "loss": 0.16, |
| "step": 13690 |
| }, |
| { |
| "epoch": 4.3282521127872995, |
| "grad_norm": 0.19760551499634138, |
| "learning_rate": 5.393230794881399e-07, |
| "loss": 0.1587, |
| "step": 13700 |
| }, |
| { |
| "epoch": 4.3314114208988235, |
| "grad_norm": 0.1919192060783246, |
| "learning_rate": 5.343512387458621e-07, |
| "loss": 0.1598, |
| "step": 13710 |
| }, |
| { |
| "epoch": 4.334570729010347, |
| "grad_norm": 0.19916105439716603, |
| "learning_rate": 5.294011271250549e-07, |
| "loss": 0.1581, |
| "step": 13720 |
| }, |
| { |
| "epoch": 4.3377300371218706, |
| "grad_norm": 0.1924623858324796, |
| "learning_rate": 5.244727687121581e-07, |
| "loss": 0.1585, |
| "step": 13730 |
| }, |
| { |
| "epoch": 4.340889345233394, |
| "grad_norm": 0.19222422456276647, |
| "learning_rate": 5.195661874877633e-07, |
| "loss": 0.1585, |
| "step": 13740 |
| }, |
| { |
| "epoch": 4.344048653344918, |
| "grad_norm": 0.1925599596150807, |
| "learning_rate": 5.14681407326495e-07, |
| "loss": 0.161, |
| "step": 13750 |
| }, |
| { |
| "epoch": 4.347207961456441, |
| "grad_norm": 0.1950428531940909, |
| "learning_rate": 5.098184519969041e-07, |
| "loss": 0.1581, |
| "step": 13760 |
| }, |
| { |
| "epoch": 4.350367269567965, |
| "grad_norm": 0.20354980424119892, |
| "learning_rate": 5.049773451613382e-07, |
| "loss": 0.1607, |
| "step": 13770 |
| }, |
| { |
| "epoch": 4.353526577679488, |
| "grad_norm": 0.1868628417947928, |
| "learning_rate": 5.001581103758374e-07, |
| "loss": 0.1601, |
| "step": 13780 |
| }, |
| { |
| "epoch": 4.356685885791012, |
| "grad_norm": 0.19984754637012053, |
| "learning_rate": 4.95360771090016e-07, |
| "loss": 0.159, |
| "step": 13790 |
| }, |
| { |
| "epoch": 4.359845193902535, |
| "grad_norm": 0.19703967834397978, |
| "learning_rate": 4.905853506469477e-07, |
| "loss": 0.159, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.363004502014059, |
| "grad_norm": 0.18977597668459417, |
| "learning_rate": 4.858318722830518e-07, |
| "loss": 0.1583, |
| "step": 13810 |
| }, |
| { |
| "epoch": 4.366163810125583, |
| "grad_norm": 0.19339667722600956, |
| "learning_rate": 4.811003591279834e-07, |
| "loss": 0.1585, |
| "step": 13820 |
| }, |
| { |
| "epoch": 4.369323118237106, |
| "grad_norm": 0.1938566607621067, |
| "learning_rate": 4.7639083420451425e-07, |
| "loss": 0.1593, |
| "step": 13830 |
| }, |
| { |
| "epoch": 4.37248242634863, |
| "grad_norm": 0.19830744178047272, |
| "learning_rate": 4.71703320428431e-07, |
| "loss": 0.1591, |
| "step": 13840 |
| }, |
| { |
| "epoch": 4.375641734460153, |
| "grad_norm": 0.20447285160479836, |
| "learning_rate": 4.6703784060841194e-07, |
| "loss": 0.1592, |
| "step": 13850 |
| }, |
| { |
| "epoch": 4.378801042571677, |
| "grad_norm": 0.19999302428118468, |
| "learning_rate": 4.623944174459238e-07, |
| "loss": 0.1596, |
| "step": 13860 |
| }, |
| { |
| "epoch": 4.3819603506832, |
| "grad_norm": 0.19431839969423845, |
| "learning_rate": 4.5777307353511103e-07, |
| "loss": 0.1587, |
| "step": 13870 |
| }, |
| { |
| "epoch": 4.385119658794724, |
| "grad_norm": 0.19213231385230065, |
| "learning_rate": 4.53173831362681e-07, |
| "loss": 0.1598, |
| "step": 13880 |
| }, |
| { |
| "epoch": 4.388278966906247, |
| "grad_norm": 0.19727512610076384, |
| "learning_rate": 4.485967133078001e-07, |
| "loss": 0.1595, |
| "step": 13890 |
| }, |
| { |
| "epoch": 4.391438275017771, |
| "grad_norm": 0.20137928321566095, |
| "learning_rate": 4.440417416419812e-07, |
| "loss": 0.1608, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.394597583129294, |
| "grad_norm": 0.19192751875350025, |
| "learning_rate": 4.395089385289747e-07, |
| "loss": 0.1582, |
| "step": 13910 |
| }, |
| { |
| "epoch": 4.397756891240818, |
| "grad_norm": 0.19508844609767204, |
| "learning_rate": 4.3499832602466764e-07, |
| "loss": 0.1612, |
| "step": 13920 |
| }, |
| { |
| "epoch": 4.400916199352342, |
| "grad_norm": 0.20085050538502322, |
| "learning_rate": 4.3050992607696354e-07, |
| "loss": 0.1585, |
| "step": 13930 |
| }, |
| { |
| "epoch": 4.404075507463865, |
| "grad_norm": 0.19345802602283768, |
| "learning_rate": 4.260437605256912e-07, |
| "loss": 0.1593, |
| "step": 13940 |
| }, |
| { |
| "epoch": 4.407234815575389, |
| "grad_norm": 0.19853948221248452, |
| "learning_rate": 4.215998511024844e-07, |
| "loss": 0.1593, |
| "step": 13950 |
| }, |
| { |
| "epoch": 4.410394123686912, |
| "grad_norm": 0.19645554805603088, |
| "learning_rate": 4.171782194306856e-07, |
| "loss": 0.1581, |
| "step": 13960 |
| }, |
| { |
| "epoch": 4.413553431798436, |
| "grad_norm": 0.18983249029369767, |
| "learning_rate": 4.127788870252358e-07, |
| "loss": 0.1592, |
| "step": 13970 |
| }, |
| { |
| "epoch": 4.4167127399099595, |
| "grad_norm": 0.19264647219798062, |
| "learning_rate": 4.084018752925728e-07, |
| "loss": 0.162, |
| "step": 13980 |
| }, |
| { |
| "epoch": 4.4198720480214835, |
| "grad_norm": 0.19188102638619134, |
| "learning_rate": 4.0404720553052225e-07, |
| "loss": 0.1599, |
| "step": 13990 |
| }, |
| { |
| "epoch": 4.423031356133007, |
| "grad_norm": 0.19594341326526055, |
| "learning_rate": 3.997148989282035e-07, |
| "loss": 0.1582, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.426190664244531, |
| "grad_norm": 0.19679190764934815, |
| "learning_rate": 3.9540497656591235e-07, |
| "loss": 0.16, |
| "step": 14010 |
| }, |
| { |
| "epoch": 4.429349972356054, |
| "grad_norm": 0.2001818569734459, |
| "learning_rate": 3.911174594150352e-07, |
| "loss": 0.161, |
| "step": 14020 |
| }, |
| { |
| "epoch": 4.432509280467578, |
| "grad_norm": 0.19932050541026844, |
| "learning_rate": 3.868523683379316e-07, |
| "loss": 0.1609, |
| "step": 14030 |
| }, |
| { |
| "epoch": 4.435668588579102, |
| "grad_norm": 0.19545767797055838, |
| "learning_rate": 3.8260972408784236e-07, |
| "loss": 0.1586, |
| "step": 14040 |
| }, |
| { |
| "epoch": 4.438827896690625, |
| "grad_norm": 0.19560441338063028, |
| "learning_rate": 3.7838954730878505e-07, |
| "loss": 0.1597, |
| "step": 14050 |
| }, |
| { |
| "epoch": 4.441987204802149, |
| "grad_norm": 0.19520076627630972, |
| "learning_rate": 3.741918585354548e-07, |
| "loss": 0.1601, |
| "step": 14060 |
| }, |
| { |
| "epoch": 4.445146512913672, |
| "grad_norm": 0.2021625860562286, |
| "learning_rate": 3.7001667819312303e-07, |
| "loss": 0.1589, |
| "step": 14070 |
| }, |
| { |
| "epoch": 4.448305821025196, |
| "grad_norm": 0.19657547564424072, |
| "learning_rate": 3.6586402659753994e-07, |
| "loss": 0.1593, |
| "step": 14080 |
| }, |
| { |
| "epoch": 4.451465129136719, |
| "grad_norm": 0.2371545042414208, |
| "learning_rate": 3.617339239548312e-07, |
| "loss": 0.1602, |
| "step": 14090 |
| }, |
| { |
| "epoch": 4.454624437248243, |
| "grad_norm": 0.19885312184722165, |
| "learning_rate": 3.5762639036140856e-07, |
| "loss": 0.1595, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.457783745359766, |
| "grad_norm": 0.20782243257336264, |
| "learning_rate": 3.5354144580385997e-07, |
| "loss": 0.1602, |
| "step": 14110 |
| }, |
| { |
| "epoch": 4.46094305347129, |
| "grad_norm": 0.18932292467991474, |
| "learning_rate": 3.494791101588657e-07, |
| "loss": 0.1616, |
| "step": 14120 |
| }, |
| { |
| "epoch": 4.464102361582813, |
| "grad_norm": 0.1919157424283962, |
| "learning_rate": 3.454394031930885e-07, |
| "loss": 0.1593, |
| "step": 14130 |
| }, |
| { |
| "epoch": 4.467261669694337, |
| "grad_norm": 0.2010076659233086, |
| "learning_rate": 3.414223445630865e-07, |
| "loss": 0.1599, |
| "step": 14140 |
| }, |
| { |
| "epoch": 4.47042097780586, |
| "grad_norm": 0.20255576550229257, |
| "learning_rate": 3.3742795381521533e-07, |
| "loss": 0.1593, |
| "step": 14150 |
| }, |
| { |
| "epoch": 4.473580285917384, |
| "grad_norm": 0.194621411799477, |
| "learning_rate": 3.334562503855321e-07, |
| "loss": 0.1597, |
| "step": 14160 |
| }, |
| { |
| "epoch": 4.476739594028908, |
| "grad_norm": 0.18866219829191938, |
| "learning_rate": 3.295072535996974e-07, |
| "loss": 0.1581, |
| "step": 14170 |
| }, |
| { |
| "epoch": 4.479898902140431, |
| "grad_norm": 0.19349313221379064, |
| "learning_rate": 3.255809826728923e-07, |
| "loss": 0.1601, |
| "step": 14180 |
| }, |
| { |
| "epoch": 4.483058210251955, |
| "grad_norm": 0.20115409541502788, |
| "learning_rate": 3.2167745670970973e-07, |
| "loss": 0.1601, |
| "step": 14190 |
| }, |
| { |
| "epoch": 4.486217518363478, |
| "grad_norm": 0.18780373745693515, |
| "learning_rate": 3.1779669470407615e-07, |
| "loss": 0.1589, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.489376826475002, |
| "grad_norm": 0.19486811896601117, |
| "learning_rate": 3.1393871553914654e-07, |
| "loss": 0.1587, |
| "step": 14210 |
| }, |
| { |
| "epoch": 4.492536134586525, |
| "grad_norm": 0.2025724767403799, |
| "learning_rate": 3.101035379872219e-07, |
| "loss": 0.1595, |
| "step": 14220 |
| }, |
| { |
| "epoch": 4.495695442698049, |
| "grad_norm": 0.2080673328738384, |
| "learning_rate": 3.06291180709653e-07, |
| "loss": 0.1593, |
| "step": 14230 |
| }, |
| { |
| "epoch": 4.498854750809572, |
| "grad_norm": 0.19149699902846998, |
| "learning_rate": 3.0250166225675115e-07, |
| "loss": 0.1599, |
| "step": 14240 |
| }, |
| { |
| "epoch": 4.502014058921096, |
| "grad_norm": 0.20411585929483625, |
| "learning_rate": 2.987350010676976e-07, |
| "loss": 0.1602, |
| "step": 14250 |
| }, |
| { |
| "epoch": 4.5051733670326195, |
| "grad_norm": 0.19343496961687232, |
| "learning_rate": 2.9499121547045426e-07, |
| "loss": 0.1599, |
| "step": 14260 |
| }, |
| { |
| "epoch": 4.5083326751441435, |
| "grad_norm": 0.19449547358661812, |
| "learning_rate": 2.912703236816722e-07, |
| "loss": 0.1606, |
| "step": 14270 |
| }, |
| { |
| "epoch": 4.511491983255667, |
| "grad_norm": 0.1973387536772748, |
| "learning_rate": 2.8757234380660857e-07, |
| "loss": 0.1599, |
| "step": 14280 |
| }, |
| { |
| "epoch": 4.514651291367191, |
| "grad_norm": 0.18818779692322143, |
| "learning_rate": 2.838972938390311e-07, |
| "loss": 0.1601, |
| "step": 14290 |
| }, |
| { |
| "epoch": 4.5178105994787146, |
| "grad_norm": 0.19985204703053389, |
| "learning_rate": 2.802451916611365e-07, |
| "loss": 0.1583, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.520969907590238, |
| "grad_norm": 0.19536890400548887, |
| "learning_rate": 2.7661605504346045e-07, |
| "loss": 0.1608, |
| "step": 14310 |
| }, |
| { |
| "epoch": 4.524129215701762, |
| "grad_norm": 0.18811223542835254, |
| "learning_rate": 2.730099016447929e-07, |
| "loss": 0.1596, |
| "step": 14320 |
| }, |
| { |
| "epoch": 4.527288523813285, |
| "grad_norm": 0.19468411001885044, |
| "learning_rate": 2.6942674901209e-07, |
| "loss": 0.16, |
| "step": 14330 |
| }, |
| { |
| "epoch": 4.530447831924809, |
| "grad_norm": 0.20552266635420624, |
| "learning_rate": 2.658666145803912e-07, |
| "loss": 0.1615, |
| "step": 14340 |
| }, |
| { |
| "epoch": 4.533607140036332, |
| "grad_norm": 0.1937881234664753, |
| "learning_rate": 2.623295156727301e-07, |
| "loss": 0.1578, |
| "step": 14350 |
| }, |
| { |
| "epoch": 4.536766448147856, |
| "grad_norm": 0.20412087334287093, |
| "learning_rate": 2.588154695000589e-07, |
| "loss": 0.1612, |
| "step": 14360 |
| }, |
| { |
| "epoch": 4.539925756259379, |
| "grad_norm": 0.19434151561756924, |
| "learning_rate": 2.55324493161152e-07, |
| "loss": 0.1584, |
| "step": 14370 |
| }, |
| { |
| "epoch": 4.543085064370903, |
| "grad_norm": 0.1908270600724035, |
| "learning_rate": 2.5185660364253515e-07, |
| "loss": 0.1593, |
| "step": 14380 |
| }, |
| { |
| "epoch": 4.546244372482426, |
| "grad_norm": 0.2004261876800901, |
| "learning_rate": 2.484118178183953e-07, |
| "loss": 0.1581, |
| "step": 14390 |
| }, |
| { |
| "epoch": 4.54940368059395, |
| "grad_norm": 0.19755205920634125, |
| "learning_rate": 2.4499015245049997e-07, |
| "loss": 0.1601, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.552562988705473, |
| "grad_norm": 0.198156216790078, |
| "learning_rate": 2.415916241881172e-07, |
| "loss": 0.1606, |
| "step": 14410 |
| }, |
| { |
| "epoch": 4.555722296816997, |
| "grad_norm": 0.195998760066744, |
| "learning_rate": 2.382162495679341e-07, |
| "loss": 0.1601, |
| "step": 14420 |
| }, |
| { |
| "epoch": 4.558881604928521, |
| "grad_norm": 0.18945574448552882, |
| "learning_rate": 2.3486404501397497e-07, |
| "loss": 0.158, |
| "step": 14430 |
| }, |
| { |
| "epoch": 4.562040913040044, |
| "grad_norm": 0.19748534603778248, |
| "learning_rate": 2.315350268375227e-07, |
| "loss": 0.1574, |
| "step": 14440 |
| }, |
| { |
| "epoch": 4.565200221151568, |
| "grad_norm": 0.20176443804299427, |
| "learning_rate": 2.2822921123703822e-07, |
| "loss": 0.1603, |
| "step": 14450 |
| }, |
| { |
| "epoch": 4.568359529263091, |
| "grad_norm": 0.19789674088621473, |
| "learning_rate": 2.249466142980844e-07, |
| "loss": 0.1598, |
| "step": 14460 |
| }, |
| { |
| "epoch": 4.571518837374615, |
| "grad_norm": 0.187818302106187, |
| "learning_rate": 2.2168725199324336e-07, |
| "loss": 0.159, |
| "step": 14470 |
| }, |
| { |
| "epoch": 4.574678145486138, |
| "grad_norm": 0.20046350486590453, |
| "learning_rate": 2.1845114018204382e-07, |
| "loss": 0.16, |
| "step": 14480 |
| }, |
| { |
| "epoch": 4.577837453597662, |
| "grad_norm": 0.1956285243391142, |
| "learning_rate": 2.1523829461087997e-07, |
| "loss": 0.1606, |
| "step": 14490 |
| }, |
| { |
| "epoch": 4.580996761709185, |
| "grad_norm": 0.19955861141039574, |
| "learning_rate": 2.12048730912936e-07, |
| "loss": 0.1599, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.584156069820709, |
| "grad_norm": 0.19999137325948985, |
| "learning_rate": 2.0888246460811168e-07, |
| "loss": 0.1581, |
| "step": 14510 |
| }, |
| { |
| "epoch": 4.587315377932233, |
| "grad_norm": 0.19609764005409422, |
| "learning_rate": 2.057395111029431e-07, |
| "loss": 0.1587, |
| "step": 14520 |
| }, |
| { |
| "epoch": 4.590474686043756, |
| "grad_norm": 0.19243471223634231, |
| "learning_rate": 2.0261988569053205e-07, |
| "loss": 0.1585, |
| "step": 14530 |
| }, |
| { |
| "epoch": 4.5936339941552795, |
| "grad_norm": 0.20046273900446646, |
| "learning_rate": 1.995236035504694e-07, |
| "loss": 0.1602, |
| "step": 14540 |
| }, |
| { |
| "epoch": 4.5967933022668035, |
| "grad_norm": 0.19670271834661107, |
| "learning_rate": 1.9645067974876086e-07, |
| "loss": 0.1593, |
| "step": 14550 |
| }, |
| { |
| "epoch": 4.5999526103783275, |
| "grad_norm": 0.19348803453560268, |
| "learning_rate": 1.9340112923775467e-07, |
| "loss": 0.1572, |
| "step": 14560 |
| }, |
| { |
| "epoch": 4.603111918489851, |
| "grad_norm": 0.19604507063856969, |
| "learning_rate": 1.9037496685606782e-07, |
| "loss": 0.1615, |
| "step": 14570 |
| }, |
| { |
| "epoch": 4.606271226601375, |
| "grad_norm": 0.19905957494150894, |
| "learning_rate": 1.873722073285156e-07, |
| "loss": 0.1599, |
| "step": 14580 |
| }, |
| { |
| "epoch": 4.609430534712898, |
| "grad_norm": 0.1931564026479933, |
| "learning_rate": 1.8439286526603816e-07, |
| "loss": 0.1605, |
| "step": 14590 |
| }, |
| { |
| "epoch": 4.612589842824422, |
| "grad_norm": 0.1957472742685542, |
| "learning_rate": 1.814369551656281e-07, |
| "loss": 0.1576, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.615749150935945, |
| "grad_norm": 0.19222186879935804, |
| "learning_rate": 1.7850449141026626e-07, |
| "loss": 0.158, |
| "step": 14610 |
| }, |
| { |
| "epoch": 4.618908459047469, |
| "grad_norm": 0.19288882737133697, |
| "learning_rate": 1.755954882688432e-07, |
| "loss": 0.1599, |
| "step": 14620 |
| }, |
| { |
| "epoch": 4.622067767158992, |
| "grad_norm": 0.19173903599470532, |
| "learning_rate": 1.7270995989609685e-07, |
| "loss": 0.163, |
| "step": 14630 |
| }, |
| { |
| "epoch": 4.625227075270516, |
| "grad_norm": 0.1971599904397844, |
| "learning_rate": 1.6984792033253873e-07, |
| "loss": 0.1624, |
| "step": 14640 |
| }, |
| { |
| "epoch": 4.62838638338204, |
| "grad_norm": 0.20237639199908425, |
| "learning_rate": 1.67009383504389e-07, |
| "loss": 0.1599, |
| "step": 14650 |
| }, |
| { |
| "epoch": 4.631545691493563, |
| "grad_norm": 0.19640473192083982, |
| "learning_rate": 1.6419436322350602e-07, |
| "loss": 0.1582, |
| "step": 14660 |
| }, |
| { |
| "epoch": 4.634704999605087, |
| "grad_norm": 0.19394680624920346, |
| "learning_rate": 1.6140287318732295e-07, |
| "loss": 0.1612, |
| "step": 14670 |
| }, |
| { |
| "epoch": 4.63786430771661, |
| "grad_norm": 0.19411893620985973, |
| "learning_rate": 1.5863492697877403e-07, |
| "loss": 0.1579, |
| "step": 14680 |
| }, |
| { |
| "epoch": 4.641023615828134, |
| "grad_norm": 0.19802298148744443, |
| "learning_rate": 1.5589053806623845e-07, |
| "loss": 0.1599, |
| "step": 14690 |
| }, |
| { |
| "epoch": 4.644182923939657, |
| "grad_norm": 0.19535413598264334, |
| "learning_rate": 1.5316971980346597e-07, |
| "loss": 0.1563, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.647342232051181, |
| "grad_norm": 0.19497113149503717, |
| "learning_rate": 1.5047248542951586e-07, |
| "loss": 0.1593, |
| "step": 14710 |
| }, |
| { |
| "epoch": 4.650501540162704, |
| "grad_norm": 0.2020301060995925, |
| "learning_rate": 1.4779884806869262e-07, |
| "loss": 0.1579, |
| "step": 14720 |
| }, |
| { |
| "epoch": 4.653660848274228, |
| "grad_norm": 0.19311882101118522, |
| "learning_rate": 1.4514882073048186e-07, |
| "loss": 0.1603, |
| "step": 14730 |
| }, |
| { |
| "epoch": 4.656820156385751, |
| "grad_norm": 0.19069864706937145, |
| "learning_rate": 1.4252241630948515e-07, |
| "loss": 0.159, |
| "step": 14740 |
| }, |
| { |
| "epoch": 4.659979464497275, |
| "grad_norm": 0.19826154207098273, |
| "learning_rate": 1.3991964758536148e-07, |
| "loss": 0.1594, |
| "step": 14750 |
| }, |
| { |
| "epoch": 4.663138772608798, |
| "grad_norm": 0.20463683774068114, |
| "learning_rate": 1.3734052722275849e-07, |
| "loss": 0.1607, |
| "step": 14760 |
| }, |
| { |
| "epoch": 4.666298080720322, |
| "grad_norm": 0.19219549706945396, |
| "learning_rate": 1.3478506777125865e-07, |
| "loss": 0.1574, |
| "step": 14770 |
| }, |
| { |
| "epoch": 4.669457388831846, |
| "grad_norm": 0.19753785214231598, |
| "learning_rate": 1.3225328166531158e-07, |
| "loss": 0.1599, |
| "step": 14780 |
| }, |
| { |
| "epoch": 4.672616696943369, |
| "grad_norm": 0.1970861929609217, |
| "learning_rate": 1.297451812241779e-07, |
| "loss": 0.1607, |
| "step": 14790 |
| }, |
| { |
| "epoch": 4.675776005054893, |
| "grad_norm": 0.1966183263052465, |
| "learning_rate": 1.2726077865186648e-07, |
| "loss": 0.159, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.678935313166416, |
| "grad_norm": 0.1943429806373708, |
| "learning_rate": 1.2480008603707627e-07, |
| "loss": 0.158, |
| "step": 14810 |
| }, |
| { |
| "epoch": 4.68209462127794, |
| "grad_norm": 0.19589426287899958, |
| "learning_rate": 1.223631153531385e-07, |
| "loss": 0.1577, |
| "step": 14820 |
| }, |
| { |
| "epoch": 4.6852539293894635, |
| "grad_norm": 0.19706763059063206, |
| "learning_rate": 1.1994987845795725e-07, |
| "loss": 0.1597, |
| "step": 14830 |
| }, |
| { |
| "epoch": 4.6884132375009875, |
| "grad_norm": 0.1975866024609479, |
| "learning_rate": 1.1756038709394902e-07, |
| "loss": 0.1593, |
| "step": 14840 |
| }, |
| { |
| "epoch": 4.691572545612511, |
| "grad_norm": 0.19686807427075748, |
| "learning_rate": 1.1519465288799325e-07, |
| "loss": 0.1599, |
| "step": 14850 |
| }, |
| { |
| "epoch": 4.694731853724035, |
| "grad_norm": 0.19405867333452428, |
| "learning_rate": 1.1285268735136634e-07, |
| "loss": 0.1599, |
| "step": 14860 |
| }, |
| { |
| "epoch": 4.697891161835558, |
| "grad_norm": 0.19332590972909364, |
| "learning_rate": 1.1053450187969383e-07, |
| "loss": 0.159, |
| "step": 14870 |
| }, |
| { |
| "epoch": 4.701050469947082, |
| "grad_norm": 0.19949006237363448, |
| "learning_rate": 1.0824010775288829e-07, |
| "loss": 0.1593, |
| "step": 14880 |
| }, |
| { |
| "epoch": 4.704209778058605, |
| "grad_norm": 0.19416481710856007, |
| "learning_rate": 1.0596951613509931e-07, |
| "loss": 0.1592, |
| "step": 14890 |
| }, |
| { |
| "epoch": 4.707369086170129, |
| "grad_norm": 0.19602339515974787, |
| "learning_rate": 1.0372273807465638e-07, |
| "loss": 0.1591, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.710528394281653, |
| "grad_norm": 0.1960689688312678, |
| "learning_rate": 1.0149978450401776e-07, |
| "loss": 0.1603, |
| "step": 14910 |
| }, |
| { |
| "epoch": 4.713687702393176, |
| "grad_norm": 0.2057850892961581, |
| "learning_rate": 9.930066623971334e-08, |
| "loss": 0.1591, |
| "step": 14920 |
| }, |
| { |
| "epoch": 4.7168470105047, |
| "grad_norm": 0.19301696301593, |
| "learning_rate": 9.712539398229637e-08, |
| "loss": 0.1602, |
| "step": 14930 |
| }, |
| { |
| "epoch": 4.720006318616223, |
| "grad_norm": 0.19265047597399218, |
| "learning_rate": 9.497397831628673e-08, |
| "loss": 0.1594, |
| "step": 14940 |
| }, |
| { |
| "epoch": 4.723165626727747, |
| "grad_norm": 0.19291383326551242, |
| "learning_rate": 9.284642971012559e-08, |
| "loss": 0.1556, |
| "step": 14950 |
| }, |
| { |
| "epoch": 4.72632493483927, |
| "grad_norm": 0.1985670750054423, |
| "learning_rate": 9.074275851611691e-08, |
| "loss": 0.1611, |
| "step": 14960 |
| }, |
| { |
| "epoch": 4.729484242950794, |
| "grad_norm": 0.1956461829616837, |
| "learning_rate": 8.866297497038435e-08, |
| "loss": 0.1595, |
| "step": 14970 |
| }, |
| { |
| "epoch": 4.732643551062317, |
| "grad_norm": 0.1913906186621511, |
| "learning_rate": 8.660708919281613e-08, |
| "loss": 0.1596, |
| "step": 14980 |
| }, |
| { |
| "epoch": 4.735802859173841, |
| "grad_norm": 0.20269780565703013, |
| "learning_rate": 8.457511118701911e-08, |
| "loss": 0.1585, |
| "step": 14990 |
| }, |
| { |
| "epoch": 4.738962167285364, |
| "grad_norm": 0.2006526774187281, |
| "learning_rate": 8.256705084026761e-08, |
| "loss": 0.159, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.742121475396888, |
| "grad_norm": 0.19809011682211267, |
| "learning_rate": 8.05829179234574e-08, |
| "loss": 0.1585, |
| "step": 15010 |
| }, |
| { |
| "epoch": 4.745280783508411, |
| "grad_norm": 0.1925300643932596, |
| "learning_rate": 7.862272209105625e-08, |
| "loss": 0.1593, |
| "step": 15020 |
| }, |
| { |
| "epoch": 4.748440091619935, |
| "grad_norm": 0.1928029167127188, |
| "learning_rate": 7.668647288106012e-08, |
| "loss": 0.1599, |
| "step": 15030 |
| }, |
| { |
| "epoch": 4.751599399731459, |
| "grad_norm": 0.19481339189265057, |
| "learning_rate": 7.47741797149415e-08, |
| "loss": 0.1601, |
| "step": 15040 |
| }, |
| { |
| "epoch": 4.754758707842982, |
| "grad_norm": 0.20271075417943374, |
| "learning_rate": 7.288585189760944e-08, |
| "loss": 0.1617, |
| "step": 15050 |
| }, |
| { |
| "epoch": 4.757918015954506, |
| "grad_norm": 0.19802943106559054, |
| "learning_rate": 7.102149861735962e-08, |
| "loss": 0.1585, |
| "step": 15060 |
| }, |
| { |
| "epoch": 4.761077324066029, |
| "grad_norm": 0.19714245747298037, |
| "learning_rate": 6.918112894583328e-08, |
| "loss": 0.1618, |
| "step": 15070 |
| }, |
| { |
| "epoch": 4.764236632177553, |
| "grad_norm": 0.20555489147236597, |
| "learning_rate": 6.736475183796887e-08, |
| "loss": 0.1598, |
| "step": 15080 |
| }, |
| { |
| "epoch": 4.767395940289076, |
| "grad_norm": 0.19422658967567297, |
| "learning_rate": 6.557237613196321e-08, |
| "loss": 0.1607, |
| "step": 15090 |
| }, |
| { |
| "epoch": 4.7705552484006, |
| "grad_norm": 0.19909191153347178, |
| "learning_rate": 6.380401054922547e-08, |
| "loss": 0.1594, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.7737145565121235, |
| "grad_norm": 0.18853836885626313, |
| "learning_rate": 6.205966369433547e-08, |
| "loss": 0.1607, |
| "step": 15110 |
| }, |
| { |
| "epoch": 4.7768738646236475, |
| "grad_norm": 0.2001345804485893, |
| "learning_rate": 6.033934405500042e-08, |
| "loss": 0.1618, |
| "step": 15120 |
| }, |
| { |
| "epoch": 4.7800331727351715, |
| "grad_norm": 0.20108034656670212, |
| "learning_rate": 5.864306000201825e-08, |
| "loss": 0.1625, |
| "step": 15130 |
| }, |
| { |
| "epoch": 4.783192480846695, |
| "grad_norm": 0.1957272210969326, |
| "learning_rate": 5.697081978922936e-08, |
| "loss": 0.16, |
| "step": 15140 |
| }, |
| { |
| "epoch": 4.786351788958218, |
| "grad_norm": 0.20101643906993924, |
| "learning_rate": 5.5322631553484385e-08, |
| "loss": 0.1587, |
| "step": 15150 |
| }, |
| { |
| "epoch": 4.789511097069742, |
| "grad_norm": 0.1932795778151164, |
| "learning_rate": 5.369850331459925e-08, |
| "loss": 0.1609, |
| "step": 15160 |
| }, |
| { |
| "epoch": 4.792670405181266, |
| "grad_norm": 0.2009306860005601, |
| "learning_rate": 5.209844297531796e-08, |
| "loss": 0.159, |
| "step": 15170 |
| }, |
| { |
| "epoch": 4.795829713292789, |
| "grad_norm": 0.18836162035628687, |
| "learning_rate": 5.052245832127434e-08, |
| "loss": 0.1596, |
| "step": 15180 |
| }, |
| { |
| "epoch": 4.798989021404313, |
| "grad_norm": 0.19233309651869823, |
| "learning_rate": 4.8970557020954215e-08, |
| "loss": 0.1614, |
| "step": 15190 |
| }, |
| { |
| "epoch": 4.802148329515836, |
| "grad_norm": 0.20067963469184333, |
| "learning_rate": 4.744274662565662e-08, |
| "loss": 0.16, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.80530763762736, |
| "grad_norm": 0.1907746195534059, |
| "learning_rate": 4.5939034569458804e-08, |
| "loss": 0.1595, |
| "step": 15210 |
| }, |
| { |
| "epoch": 4.808466945738883, |
| "grad_norm": 0.18618207146361934, |
| "learning_rate": 4.4459428169179583e-08, |
| "loss": 0.1596, |
| "step": 15220 |
| }, |
| { |
| "epoch": 4.811626253850407, |
| "grad_norm": 0.1924146366492045, |
| "learning_rate": 4.3003934624342716e-08, |
| "loss": 0.1581, |
| "step": 15230 |
| }, |
| { |
| "epoch": 4.81478556196193, |
| "grad_norm": 0.19971943762565272, |
| "learning_rate": 4.157256101714413e-08, |
| "loss": 0.1577, |
| "step": 15240 |
| }, |
| { |
| "epoch": 4.817944870073454, |
| "grad_norm": 0.19654001189058995, |
| "learning_rate": 4.016531431241533e-08, |
| "loss": 0.1588, |
| "step": 15250 |
| }, |
| { |
| "epoch": 4.821104178184978, |
| "grad_norm": 0.20256447137055644, |
| "learning_rate": 3.8782201357589475e-08, |
| "loss": 0.1592, |
| "step": 15260 |
| }, |
| { |
| "epoch": 4.824263486296501, |
| "grad_norm": 0.19955523886494098, |
| "learning_rate": 3.742322888267036e-08, |
| "loss": 0.159, |
| "step": 15270 |
| }, |
| { |
| "epoch": 4.827422794408025, |
| "grad_norm": 0.20426304048843397, |
| "learning_rate": 3.6088403500196267e-08, |
| "loss": 0.1585, |
| "step": 15280 |
| }, |
| { |
| "epoch": 4.830582102519548, |
| "grad_norm": 0.19123805822649273, |
| "learning_rate": 3.4777731705211705e-08, |
| "loss": 0.1599, |
| "step": 15290 |
| }, |
| { |
| "epoch": 4.833741410631072, |
| "grad_norm": 0.1961561945761796, |
| "learning_rate": 3.349121987523241e-08, |
| "loss": 0.1603, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.836900718742595, |
| "grad_norm": 0.19703665699422515, |
| "learning_rate": 3.222887427021537e-08, |
| "loss": 0.1584, |
| "step": 15310 |
| }, |
| { |
| "epoch": 4.840060026854119, |
| "grad_norm": 0.205577530927033, |
| "learning_rate": 3.099070103253055e-08, |
| "loss": 0.1599, |
| "step": 15320 |
| }, |
| { |
| "epoch": 4.843219334965642, |
| "grad_norm": 0.19037949799154982, |
| "learning_rate": 2.977670618692641e-08, |
| "loss": 0.1588, |
| "step": 15330 |
| }, |
| { |
| "epoch": 4.846378643077166, |
| "grad_norm": 0.18634230770244323, |
| "learning_rate": 2.8586895640504986e-08, |
| "loss": 0.1589, |
| "step": 15340 |
| }, |
| { |
| "epoch": 4.849537951188689, |
| "grad_norm": 0.19361637810104884, |
| "learning_rate": 2.7421275182691887e-08, |
| "loss": 0.1576, |
| "step": 15350 |
| }, |
| { |
| "epoch": 4.852697259300213, |
| "grad_norm": 0.1957214714937506, |
| "learning_rate": 2.6279850485206316e-08, |
| "loss": 0.162, |
| "step": 15360 |
| }, |
| { |
| "epoch": 4.855856567411736, |
| "grad_norm": 0.19434731826900606, |
| "learning_rate": 2.5162627102035543e-08, |
| "loss": 0.1607, |
| "step": 15370 |
| }, |
| { |
| "epoch": 4.85901587552326, |
| "grad_norm": 0.19311901028457665, |
| "learning_rate": 2.406961046940659e-08, |
| "loss": 0.1597, |
| "step": 15380 |
| }, |
| { |
| "epoch": 4.862175183634784, |
| "grad_norm": 0.19029235111786966, |
| "learning_rate": 2.3000805905761814e-08, |
| "loss": 0.1571, |
| "step": 15390 |
| }, |
| { |
| "epoch": 4.8653344917463075, |
| "grad_norm": 0.18986353610183304, |
| "learning_rate": 2.1956218611730028e-08, |
| "loss": 0.1599, |
| "step": 15400 |
| }, |
| { |
| "epoch": 4.8684937998578315, |
| "grad_norm": 0.19230056674947954, |
| "learning_rate": 2.0935853670103202e-08, |
| "loss": 0.1587, |
| "step": 15410 |
| }, |
| { |
| "epoch": 4.871653107969355, |
| "grad_norm": 0.18970480828208378, |
| "learning_rate": 1.9939716045811463e-08, |
| "loss": 0.1601, |
| "step": 15420 |
| }, |
| { |
| "epoch": 4.874812416080879, |
| "grad_norm": 0.19866752371194085, |
| "learning_rate": 1.8967810585898695e-08, |
| "loss": 0.162, |
| "step": 15430 |
| }, |
| { |
| "epoch": 4.877971724192402, |
| "grad_norm": 0.19367079176529667, |
| "learning_rate": 1.8020142019499755e-08, |
| "loss": 0.159, |
| "step": 15440 |
| }, |
| { |
| "epoch": 4.881131032303926, |
| "grad_norm": 0.19230026377170165, |
| "learning_rate": 1.7096714957814953e-08, |
| "loss": 0.1581, |
| "step": 15450 |
| }, |
| { |
| "epoch": 4.884290340415449, |
| "grad_norm": 0.18516877508195054, |
| "learning_rate": 1.619753389409062e-08, |
| "loss": 0.1577, |
| "step": 15460 |
| }, |
| { |
| "epoch": 4.887449648526973, |
| "grad_norm": 0.2030225188585794, |
| "learning_rate": 1.5322603203595797e-08, |
| "loss": 0.1584, |
| "step": 15470 |
| }, |
| { |
| "epoch": 4.890608956638496, |
| "grad_norm": 0.19372717095423658, |
| "learning_rate": 1.4471927143601127e-08, |
| "loss": 0.1597, |
| "step": 15480 |
| }, |
| { |
| "epoch": 4.89376826475002, |
| "grad_norm": 0.19197742339555968, |
| "learning_rate": 1.3645509853357775e-08, |
| "loss": 0.1568, |
| "step": 15490 |
| }, |
| { |
| "epoch": 4.896927572861543, |
| "grad_norm": 0.2003946373624122, |
| "learning_rate": 1.2843355354079102e-08, |
| "loss": 0.1588, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.900086880973067, |
| "grad_norm": 0.19449497450048703, |
| "learning_rate": 1.2065467548917353e-08, |
| "loss": 0.1563, |
| "step": 15510 |
| }, |
| { |
| "epoch": 4.903246189084591, |
| "grad_norm": 0.19730672007764802, |
| "learning_rate": 1.1311850222949227e-08, |
| "loss": 0.1608, |
| "step": 15520 |
| }, |
| { |
| "epoch": 4.906405497196114, |
| "grad_norm": 0.19426023326462366, |
| "learning_rate": 1.0582507043153112e-08, |
| "loss": 0.1587, |
| "step": 15530 |
| }, |
| { |
| "epoch": 4.909564805307638, |
| "grad_norm": 0.18665531141294303, |
| "learning_rate": 9.877441558395761e-09, |
| "loss": 0.1588, |
| "step": 15540 |
| }, |
| { |
| "epoch": 4.912724113419161, |
| "grad_norm": 0.19544733053145186, |
| "learning_rate": 9.196657199410097e-09, |
| "loss": 0.1585, |
| "step": 15550 |
| }, |
| { |
| "epoch": 4.915883421530685, |
| "grad_norm": 0.1931838965682529, |
| "learning_rate": 8.54015727878299e-09, |
| "loss": 0.16, |
| "step": 15560 |
| }, |
| { |
| "epoch": 4.919042729642208, |
| "grad_norm": 0.18609748141590166, |
| "learning_rate": 7.90794499093639e-09, |
| "loss": 0.1597, |
| "step": 15570 |
| }, |
| { |
| "epoch": 4.922202037753732, |
| "grad_norm": 0.20127723441153517, |
| "learning_rate": 7.300023412111779e-09, |
| "loss": 0.1601, |
| "step": 15580 |
| }, |
| { |
| "epoch": 4.925361345865255, |
| "grad_norm": 0.19920490400128663, |
| "learning_rate": 6.716395500357964e-09, |
| "loss": 0.1577, |
| "step": 15590 |
| }, |
| { |
| "epoch": 4.928520653976779, |
| "grad_norm": 0.19375948427344167, |
| "learning_rate": 6.157064095512754e-09, |
| "loss": 0.1599, |
| "step": 15600 |
| }, |
| { |
| "epoch": 4.931679962088302, |
| "grad_norm": 0.19750901689149744, |
| "learning_rate": 5.622031919191862e-09, |
| "loss": 0.1587, |
| "step": 15610 |
| }, |
| { |
| "epoch": 4.934839270199826, |
| "grad_norm": 0.18708793754549835, |
| "learning_rate": 5.1113015747755735e-09, |
| "loss": 0.1601, |
| "step": 15620 |
| }, |
| { |
| "epoch": 4.937998578311349, |
| "grad_norm": 0.1928749265530207, |
| "learning_rate": 4.624875547394325e-09, |
| "loss": 0.1589, |
| "step": 15630 |
| }, |
| { |
| "epoch": 4.941157886422873, |
| "grad_norm": 0.20136315326321008, |
| "learning_rate": 4.16275620391815e-09, |
| "loss": 0.1597, |
| "step": 15640 |
| }, |
| { |
| "epoch": 4.944317194534397, |
| "grad_norm": 0.19196622559471357, |
| "learning_rate": 3.724945792945023e-09, |
| "loss": 0.1594, |
| "step": 15650 |
| }, |
| { |
| "epoch": 4.94747650264592, |
| "grad_norm": 0.19581601175358024, |
| "learning_rate": 3.3114464447892013e-09, |
| "loss": 0.1596, |
| "step": 15660 |
| }, |
| { |
| "epoch": 4.950635810757444, |
| "grad_norm": 0.19173585748074187, |
| "learning_rate": 2.922260171470681e-09, |
| "loss": 0.159, |
| "step": 15670 |
| }, |
| { |
| "epoch": 4.9537951188689675, |
| "grad_norm": 0.19350532117294958, |
| "learning_rate": 2.5573888667079772e-09, |
| "loss": 0.1616, |
| "step": 15680 |
| }, |
| { |
| "epoch": 4.9569544269804915, |
| "grad_norm": 0.19845655941270632, |
| "learning_rate": 2.2168343059042475e-09, |
| "loss": 0.1588, |
| "step": 15690 |
| }, |
| { |
| "epoch": 4.960113735092015, |
| "grad_norm": 0.19860632734242958, |
| "learning_rate": 1.9005981461434065e-09, |
| "loss": 0.1609, |
| "step": 15700 |
| }, |
| { |
| "epoch": 4.963273043203539, |
| "grad_norm": 0.2001952094467118, |
| "learning_rate": 1.6086819261790232e-09, |
| "loss": 0.1585, |
| "step": 15710 |
| }, |
| { |
| "epoch": 4.966432351315062, |
| "grad_norm": 0.1971085780740881, |
| "learning_rate": 1.3410870664276598e-09, |
| "loss": 0.1583, |
| "step": 15720 |
| }, |
| { |
| "epoch": 4.969591659426586, |
| "grad_norm": 0.19391119108700927, |
| "learning_rate": 1.0978148689633205e-09, |
| "loss": 0.1594, |
| "step": 15730 |
| }, |
| { |
| "epoch": 4.97275096753811, |
| "grad_norm": 0.19649511560351368, |
| "learning_rate": 8.788665175085697e-10, |
| "loss": 0.1603, |
| "step": 15740 |
| }, |
| { |
| "epoch": 4.975910275649633, |
| "grad_norm": 0.19371704255850056, |
| "learning_rate": 6.842430774300913e-10, |
| "loss": 0.1588, |
| "step": 15750 |
| }, |
| { |
| "epoch": 4.979069583761156, |
| "grad_norm": 0.20144520587590042, |
| "learning_rate": 5.139454957342471e-10, |
| "loss": 0.1586, |
| "step": 15760 |
| }, |
| { |
| "epoch": 4.98222889187268, |
| "grad_norm": 0.18820551886405948, |
| "learning_rate": 3.6797460106152707e-10, |
| "loss": 0.1585, |
| "step": 15770 |
| }, |
| { |
| "epoch": 4.985388199984204, |
| "grad_norm": 0.1989233181042242, |
| "learning_rate": 2.463311036826621e-10, |
| "loss": 0.1583, |
| "step": 15780 |
| }, |
| { |
| "epoch": 4.988547508095727, |
| "grad_norm": 0.1988452001930714, |
| "learning_rate": 1.490155954947392e-10, |
| "loss": 0.159, |
| "step": 15790 |
| }, |
| { |
| "epoch": 4.991706816207251, |
| "grad_norm": 0.19747851657350868, |
| "learning_rate": 7.602855001953569e-11, |
| "loss": 0.1593, |
| "step": 15800 |
| }, |
| { |
| "epoch": 4.994866124318774, |
| "grad_norm": 0.19954559773754327, |
| "learning_rate": 2.7370322400188665e-11, |
| "loss": 0.1579, |
| "step": 15810 |
| }, |
| { |
| "epoch": 4.998025432430298, |
| "grad_norm": 0.1940730914471369, |
| "learning_rate": 3.041149399529708e-12, |
| "loss": 0.1603, |
| "step": 15820 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 15825, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.002555364979507e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|