| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 0.7378236055374146, |
| "learning_rate": 1.8e-06, |
| "loss": 1.0762, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 0.295481413602829, |
| "learning_rate": 3.8e-06, |
| "loss": 1.068, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.14812137186527252, |
| "learning_rate": 5.8e-06, |
| "loss": 1.0568, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.08043279498815536, |
| "learning_rate": 7.8e-06, |
| "loss": 1.0493, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.08855466544628143, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 1.0441, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.0877288430929184, |
| "learning_rate": 1.18e-05, |
| "loss": 1.0395, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.09314433485269547, |
| "learning_rate": 1.3800000000000002e-05, |
| "loss": 1.0285, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.10957096517086029, |
| "learning_rate": 1.58e-05, |
| "loss": 1.0216, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.09380817413330078, |
| "learning_rate": 1.78e-05, |
| "loss": 1.0177, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.12308425456285477, |
| "learning_rate": 1.9800000000000004e-05, |
| "loss": 1.0174, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.20681029558181763, |
| "learning_rate": 2.18e-05, |
| "loss": 1.0002, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.43010827898979187, |
| "learning_rate": 2.38e-05, |
| "loss": 0.9756, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.5976276993751526, |
| "learning_rate": 2.58e-05, |
| "loss": 0.9313, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.7003195881843567, |
| "learning_rate": 2.7800000000000005e-05, |
| "loss": 0.8658, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.9297671318054199, |
| "learning_rate": 2.98e-05, |
| "loss": 0.7985, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.8993100523948669, |
| "learning_rate": 3.18e-05, |
| "loss": 0.7352, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.9233132004737854, |
| "learning_rate": 3.38e-05, |
| "loss": 0.6818, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 0.8877270221710205, |
| "learning_rate": 3.58e-05, |
| "loss": 0.6224, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.8032844662666321, |
| "learning_rate": 3.7800000000000004e-05, |
| "loss": 0.5745, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 1.4975675344467163, |
| "learning_rate": 3.9800000000000005e-05, |
| "loss": 0.5299, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.8472157716751099, |
| "learning_rate": 4.18e-05, |
| "loss": 0.498, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.974686324596405, |
| "learning_rate": 4.38e-05, |
| "loss": 0.4575, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.9499194025993347, |
| "learning_rate": 4.58e-05, |
| "loss": 0.4184, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 0.9380892515182495, |
| "learning_rate": 4.78e-05, |
| "loss": 0.3887, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 1.1189147233963013, |
| "learning_rate": 4.9800000000000004e-05, |
| "loss": 0.356, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 1.1425296068191528, |
| "learning_rate": 5.1800000000000005e-05, |
| "loss": 0.3323, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 1.6566969156265259, |
| "learning_rate": 5.380000000000001e-05, |
| "loss": 0.3011, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 1.130071759223938, |
| "learning_rate": 5.580000000000001e-05, |
| "loss": 0.266, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 0.748692512512207, |
| "learning_rate": 5.7799999999999995e-05, |
| "loss": 0.2453, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 1.0026001930236816, |
| "learning_rate": 5.9800000000000003e-05, |
| "loss": 0.2321, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 1.0121994018554688, |
| "learning_rate": 6.18e-05, |
| "loss": 0.2158, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 1.153480052947998, |
| "learning_rate": 6.38e-05, |
| "loss": 0.1899, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 1.013115644454956, |
| "learning_rate": 6.58e-05, |
| "loss": 0.1817, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 0.9551103115081787, |
| "learning_rate": 6.780000000000001e-05, |
| "loss": 0.1799, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 1.242201566696167, |
| "learning_rate": 6.98e-05, |
| "loss": 0.1602, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 1.2399349212646484, |
| "learning_rate": 7.18e-05, |
| "loss": 0.1463, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 0.9403181076049805, |
| "learning_rate": 7.38e-05, |
| "loss": 0.1228, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 0.894763171672821, |
| "learning_rate": 7.58e-05, |
| "loss": 0.1122, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 0.9855173230171204, |
| "learning_rate": 7.780000000000001e-05, |
| "loss": 0.1044, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.3088891506195068, |
| "learning_rate": 7.98e-05, |
| "loss": 0.0922, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.064687967300415, |
| "learning_rate": 8.18e-05, |
| "loss": 0.0824, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.112962007522583, |
| "learning_rate": 8.38e-05, |
| "loss": 0.077, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.0096707344055176, |
| "learning_rate": 8.58e-05, |
| "loss": 0.0816, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 0.956439733505249, |
| "learning_rate": 8.78e-05, |
| "loss": 0.0721, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 0.841948926448822, |
| "learning_rate": 8.98e-05, |
| "loss": 0.0723, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 1.0005617141723633, |
| "learning_rate": 9.180000000000001e-05, |
| "loss": 0.0676, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 0.8577463030815125, |
| "learning_rate": 9.38e-05, |
| "loss": 0.0647, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 0.9084122776985168, |
| "learning_rate": 9.58e-05, |
| "loss": 0.0677, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.0833115577697754, |
| "learning_rate": 9.78e-05, |
| "loss": 0.0653, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 0.8259796500205994, |
| "learning_rate": 9.98e-05, |
| "loss": 0.0669, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 0.8890817165374756, |
| "learning_rate": 9.9999778549206e-05, |
| "loss": 0.0611, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 0.8948380351066589, |
| "learning_rate": 9.999901304280685e-05, |
| "loss": 0.0622, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 0.848558247089386, |
| "learning_rate": 9.999770075521164e-05, |
| "loss": 0.0625, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 0.7437359094619751, |
| "learning_rate": 9.99958417007713e-05, |
| "loss": 0.0626, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 0.8651896715164185, |
| "learning_rate": 9.999343589981615e-05, |
| "loss": 0.0531, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 0.8850076198577881, |
| "learning_rate": 9.999048337865568e-05, |
| "loss": 0.0615, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.15691077709198, |
| "learning_rate": 9.998698416957815e-05, |
| "loss": 0.0641, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 0.7086659073829651, |
| "learning_rate": 9.998293831085037e-05, |
| "loss": 0.0596, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 0.7347807288169861, |
| "learning_rate": 9.997834584671719e-05, |
| "loss": 0.0572, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 0.7725936770439148, |
| "learning_rate": 9.997320682740107e-05, |
| "loss": 0.0595, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 0.769038200378418, |
| "learning_rate": 9.996752130910149e-05, |
| "loss": 0.0549, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 0.6636187434196472, |
| "learning_rate": 9.99612893539944e-05, |
| "loss": 0.0538, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 0.7348890900611877, |
| "learning_rate": 9.995451103023144e-05, |
| "loss": 0.0527, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 0.7872657179832458, |
| "learning_rate": 9.994718641193928e-05, |
| "loss": 0.0557, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 0.6932393312454224, |
| "learning_rate": 9.993931557921874e-05, |
| "loss": 0.0548, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.7634221911430359, |
| "learning_rate": 9.993089861814402e-05, |
| "loss": 0.0524, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 0.7409372925758362, |
| "learning_rate": 9.992193562076166e-05, |
| "loss": 0.0496, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 0.7612417936325073, |
| "learning_rate": 9.991242668508954e-05, |
| "loss": 0.0461, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 0.7743764519691467, |
| "learning_rate": 9.990237191511587e-05, |
| "loss": 0.0435, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 0.725627601146698, |
| "learning_rate": 9.989177142079802e-05, |
| "loss": 0.0471, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.5516918301582336, |
| "learning_rate": 9.988062531806126e-05, |
| "loss": 0.0462, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.730378270149231, |
| "learning_rate": 9.986893372879762e-05, |
| "loss": 0.0487, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 0.6389397382736206, |
| "learning_rate": 9.985669678086443e-05, |
| "loss": 0.0488, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 0.8831709623336792, |
| "learning_rate": 9.984391460808298e-05, |
| "loss": 0.0514, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 0.6661105751991272, |
| "learning_rate": 9.983058735023709e-05, |
| "loss": 0.0514, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 0.7856804728507996, |
| "learning_rate": 9.98167151530715e-05, |
| "loss": 0.0471, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 0.7654492259025574, |
| "learning_rate": 9.980229816829034e-05, |
| "loss": 0.0505, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 0.6101555228233337, |
| "learning_rate": 9.978733655355544e-05, |
| "loss": 0.047, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 0.7730712890625, |
| "learning_rate": 9.977183047248464e-05, |
| "loss": 0.0424, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 0.8450173139572144, |
| "learning_rate": 9.975578009464992e-05, |
| "loss": 0.0455, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 0.5586540102958679, |
| "learning_rate": 9.97391855955757e-05, |
| "loss": 0.04, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.8502600789070129, |
| "learning_rate": 9.972204715673669e-05, |
| "loss": 0.0418, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.6090761423110962, |
| "learning_rate": 9.970436496555617e-05, |
| "loss": 0.042, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 0.6097173690795898, |
| "learning_rate": 9.968613921540373e-05, |
| "loss": 0.0451, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.765418291091919, |
| "learning_rate": 9.966737010559326e-05, |
| "loss": 0.0447, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.7200655937194824, |
| "learning_rate": 9.964805784138072e-05, |
| "loss": 0.0439, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 0.6888765692710876, |
| "learning_rate": 9.962820263396195e-05, |
| "loss": 0.0416, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.5708920359611511, |
| "learning_rate": 9.960780470047033e-05, |
| "loss": 0.0459, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 0.7507001757621765, |
| "learning_rate": 9.958686426397437e-05, |
| "loss": 0.0425, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.5076937079429626, |
| "learning_rate": 9.956538155347534e-05, |
| "loss": 0.0455, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.5799984335899353, |
| "learning_rate": 9.95433568039047e-05, |
| "loss": 0.0399, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.6337814927101135, |
| "learning_rate": 9.952079025612162e-05, |
| "loss": 0.0381, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.7000153660774231, |
| "learning_rate": 9.949768215691022e-05, |
| "loss": 0.0411, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.5318272709846497, |
| "learning_rate": 9.9474032758977e-05, |
| "loss": 0.0401, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.700434148311615, |
| "learning_rate": 9.944984232094794e-05, |
| "loss": 0.0435, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.605954647064209, |
| "learning_rate": 9.942511110736584e-05, |
| "loss": 0.0411, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 0.5715162754058838, |
| "learning_rate": 9.939983938868726e-05, |
| "loss": 0.0414, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 0.6310116648674011, |
| "learning_rate": 9.93740274412797e-05, |
| "loss": 0.0383, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.680823564529419, |
| "learning_rate": 9.934767554741846e-05, |
| "loss": 0.0457, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.632407546043396, |
| "learning_rate": 9.932078399528361e-05, |
| "loss": 0.0374, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 0.5892583727836609, |
| "learning_rate": 9.929335307895689e-05, |
| "loss": 0.0368, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 0.6278207898139954, |
| "learning_rate": 9.926538309841839e-05, |
| "loss": 0.0434, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 0.5285525321960449, |
| "learning_rate": 9.923687435954334e-05, |
| "loss": 0.0363, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 0.6097428798675537, |
| "learning_rate": 9.920782717409873e-05, |
| "loss": 0.0348, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 0.6607808470726013, |
| "learning_rate": 9.917824185973994e-05, |
| "loss": 0.0344, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 0.5603345036506653, |
| "learning_rate": 9.914811874000723e-05, |
| "loss": 0.0339, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 0.5727225542068481, |
| "learning_rate": 9.911745814432218e-05, |
| "loss": 0.0371, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 0.5944136381149292, |
| "learning_rate": 9.90862604079842e-05, |
| "loss": 0.0398, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 0.6860123872756958, |
| "learning_rate": 9.90545258721667e-05, |
| "loss": 0.0378, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 0.6331652402877808, |
| "learning_rate": 9.90222548839135e-05, |
| "loss": 0.0352, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 0.5666359066963196, |
| "learning_rate": 9.898944779613495e-05, |
| "loss": 0.034, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 0.5733403563499451, |
| "learning_rate": 9.89561049676041e-05, |
| "loss": 0.0352, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 0.5164110660552979, |
| "learning_rate": 9.89222267629528e-05, |
| "loss": 0.0379, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 0.6303825378417969, |
| "learning_rate": 9.888781355266763e-05, |
| "loss": 0.0369, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 0.5613416433334351, |
| "learning_rate": 9.885286571308598e-05, |
| "loss": 0.0338, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 0.6414242386817932, |
| "learning_rate": 9.881738362639182e-05, |
| "loss": 0.0375, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 0.5172221660614014, |
| "learning_rate": 9.878136768061154e-05, |
| "loss": 0.0376, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 0.6341432332992554, |
| "learning_rate": 9.874481826960979e-05, |
| "loss": 0.0374, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 0.624677836894989, |
| "learning_rate": 9.870773579308503e-05, |
| "loss": 0.0341, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 0.4869997203350067, |
| "learning_rate": 9.867012065656533e-05, |
| "loss": 0.0381, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 0.5759740471839905, |
| "learning_rate": 9.863197327140376e-05, |
| "loss": 0.0333, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 0.48775139451026917, |
| "learning_rate": 9.859329405477403e-05, |
| "loss": 0.0331, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 0.6388097405433655, |
| "learning_rate": 9.855408342966585e-05, |
| "loss": 0.0352, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 0.5959818363189697, |
| "learning_rate": 9.851434182488033e-05, |
| "loss": 0.0338, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 0.657508373260498, |
| "learning_rate": 9.84740696750253e-05, |
| "loss": 0.0331, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 0.7012799978256226, |
| "learning_rate": 9.843326742051055e-05, |
| "loss": 0.0348, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 0.5348427295684814, |
| "learning_rate": 9.839193550754297e-05, |
| "loss": 0.0337, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 0.7294585704803467, |
| "learning_rate": 9.835007438812177e-05, |
| "loss": 0.038, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 0.6077402830123901, |
| "learning_rate": 9.830768452003341e-05, |
| "loss": 0.0342, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 0.5021491050720215, |
| "learning_rate": 9.826476636684671e-05, |
| "loss": 0.0339, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 0.42891937494277954, |
| "learning_rate": 9.822132039790773e-05, |
| "loss": 0.0322, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 0.5746376514434814, |
| "learning_rate": 9.817734708833461e-05, |
| "loss": 0.0302, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 0.591606616973877, |
| "learning_rate": 9.813284691901243e-05, |
| "loss": 0.039, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 0.5928114056587219, |
| "learning_rate": 9.808782037658792e-05, |
| "loss": 0.0367, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 0.5678219199180603, |
| "learning_rate": 9.804226795346411e-05, |
| "loss": 0.0343, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 0.5018511414527893, |
| "learning_rate": 9.799619014779503e-05, |
| "loss": 0.0331, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 0.5295028686523438, |
| "learning_rate": 9.794958746348013e-05, |
| "loss": 0.0337, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 0.6938942074775696, |
| "learning_rate": 9.790246041015896e-05, |
| "loss": 0.0306, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 0.5297317504882812, |
| "learning_rate": 9.785480950320538e-05, |
| "loss": 0.0331, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 0.637657105922699, |
| "learning_rate": 9.78066352637221e-05, |
| "loss": 0.0311, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 0.5819315314292908, |
| "learning_rate": 9.775793821853488e-05, |
| "loss": 0.0327, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 0.7160147428512573, |
| "learning_rate": 9.77087189001868e-05, |
| "loss": 0.0323, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 0.7221500873565674, |
| "learning_rate": 9.765897784693243e-05, |
| "loss": 0.0332, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 0.5845819711685181, |
| "learning_rate": 9.760871560273197e-05, |
| "loss": 0.0312, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 0.5930690765380859, |
| "learning_rate": 9.755793271724526e-05, |
| "loss": 0.0305, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 0.4570452570915222, |
| "learning_rate": 9.750662974582584e-05, |
| "loss": 0.0372, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 0.5543919801712036, |
| "learning_rate": 9.745480724951473e-05, |
| "loss": 0.0314, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 0.5798304677009583, |
| "learning_rate": 9.740246579503447e-05, |
| "loss": 0.0336, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 0.5464045405387878, |
| "learning_rate": 9.734960595478284e-05, |
| "loss": 0.032, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 0.5292957425117493, |
| "learning_rate": 9.729622830682657e-05, |
| "loss": 0.0308, |
| "step": 1500 |
| }, |
| { |
| "grad_norm": 0.4644886255264282, |
| "learning_rate": 9.724233343489504e-05, |
| "loss": 0.0341, |
| "step": 1510 |
| }, |
| { |
| "grad_norm": 0.4468748867511749, |
| "learning_rate": 9.718792192837396e-05, |
| "loss": 0.029, |
| "step": 1520 |
| }, |
| { |
| "grad_norm": 0.5442079305648804, |
| "learning_rate": 9.713299438229886e-05, |
| "loss": 0.0337, |
| "step": 1530 |
| }, |
| { |
| "grad_norm": 0.46528083086013794, |
| "learning_rate": 9.707755139734855e-05, |
| "loss": 0.0338, |
| "step": 1540 |
| }, |
| { |
| "grad_norm": 0.5214895009994507, |
| "learning_rate": 9.702159357983866e-05, |
| "loss": 0.0315, |
| "step": 1550 |
| }, |
| { |
| "grad_norm": 0.564447820186615, |
| "learning_rate": 9.696512154171492e-05, |
| "loss": 0.0329, |
| "step": 1560 |
| }, |
| { |
| "grad_norm": 0.471500962972641, |
| "learning_rate": 9.690813590054645e-05, |
| "loss": 0.0326, |
| "step": 1570 |
| }, |
| { |
| "grad_norm": 0.52679044008255, |
| "learning_rate": 9.685063727951914e-05, |
| "loss": 0.0305, |
| "step": 1580 |
| }, |
| { |
| "grad_norm": 0.4842182993888855, |
| "learning_rate": 9.679262630742865e-05, |
| "loss": 0.0317, |
| "step": 1590 |
| }, |
| { |
| "grad_norm": 0.5807623267173767, |
| "learning_rate": 9.673410361867373e-05, |
| "loss": 0.0336, |
| "step": 1600 |
| }, |
| { |
| "grad_norm": 0.41651445627212524, |
| "learning_rate": 9.667506985324909e-05, |
| "loss": 0.031, |
| "step": 1610 |
| }, |
| { |
| "grad_norm": 0.4714881479740143, |
| "learning_rate": 9.661552565673855e-05, |
| "loss": 0.028, |
| "step": 1620 |
| }, |
| { |
| "grad_norm": 0.4803926944732666, |
| "learning_rate": 9.655547168030789e-05, |
| "loss": 0.0321, |
| "step": 1630 |
| }, |
| { |
| "grad_norm": 0.5974353551864624, |
| "learning_rate": 9.649490858069777e-05, |
| "loss": 0.0295, |
| "step": 1640 |
| }, |
| { |
| "grad_norm": 0.4964613914489746, |
| "learning_rate": 9.643383702021658e-05, |
| "loss": 0.0297, |
| "step": 1650 |
| }, |
| { |
| "grad_norm": 0.6017008423805237, |
| "learning_rate": 9.637225766673307e-05, |
| "loss": 0.0286, |
| "step": 1660 |
| }, |
| { |
| "grad_norm": 0.5116117000579834, |
| "learning_rate": 9.631017119366922e-05, |
| "loss": 0.0296, |
| "step": 1670 |
| }, |
| { |
| "grad_norm": 0.5510458946228027, |
| "learning_rate": 9.624757827999273e-05, |
| "loss": 0.0322, |
| "step": 1680 |
| }, |
| { |
| "grad_norm": 0.41116780042648315, |
| "learning_rate": 9.618447961020971e-05, |
| "loss": 0.0365, |
| "step": 1690 |
| }, |
| { |
| "grad_norm": 0.4938806891441345, |
| "learning_rate": 9.612087587435707e-05, |
| "loss": 0.0346, |
| "step": 1700 |
| }, |
| { |
| "grad_norm": 0.5194259881973267, |
| "learning_rate": 9.605676776799508e-05, |
| "loss": 0.0311, |
| "step": 1710 |
| }, |
| { |
| "grad_norm": 0.4529009163379669, |
| "learning_rate": 9.599215599219973e-05, |
| "loss": 0.0306, |
| "step": 1720 |
| }, |
| { |
| "grad_norm": 0.4386800229549408, |
| "learning_rate": 9.592704125355505e-05, |
| "loss": 0.0303, |
| "step": 1730 |
| }, |
| { |
| "grad_norm": 0.44015586376190186, |
| "learning_rate": 9.586142426414538e-05, |
| "loss": 0.0291, |
| "step": 1740 |
| }, |
| { |
| "grad_norm": 0.5530741810798645, |
| "learning_rate": 9.57953057415476e-05, |
| "loss": 0.0328, |
| "step": 1750 |
| }, |
| { |
| "grad_norm": 0.28225114941596985, |
| "learning_rate": 9.572868640882328e-05, |
| "loss": 0.0296, |
| "step": 1760 |
| }, |
| { |
| "grad_norm": 0.6074041724205017, |
| "learning_rate": 9.56615669945108e-05, |
| "loss": 0.0324, |
| "step": 1770 |
| }, |
| { |
| "grad_norm": 0.5109390616416931, |
| "learning_rate": 9.55939482326173e-05, |
| "loss": 0.03, |
| "step": 1780 |
| }, |
| { |
| "grad_norm": 0.5892201662063599, |
| "learning_rate": 9.552583086261069e-05, |
| "loss": 0.0316, |
| "step": 1790 |
| }, |
| { |
| "grad_norm": 0.4495730400085449, |
| "learning_rate": 9.545721562941168e-05, |
| "loss": 0.0295, |
| "step": 1800 |
| }, |
| { |
| "grad_norm": 0.5142664313316345, |
| "learning_rate": 9.538810328338543e-05, |
| "loss": 0.0277, |
| "step": 1810 |
| }, |
| { |
| "grad_norm": 0.4616416394710541, |
| "learning_rate": 9.531849458033349e-05, |
| "loss": 0.0307, |
| "step": 1820 |
| }, |
| { |
| "grad_norm": 0.4885185658931732, |
| "learning_rate": 9.524839028148547e-05, |
| "loss": 0.0298, |
| "step": 1830 |
| }, |
| { |
| "grad_norm": 0.4711757004261017, |
| "learning_rate": 9.517779115349077e-05, |
| "loss": 0.0304, |
| "step": 1840 |
| }, |
| { |
| "grad_norm": 0.4843687117099762, |
| "learning_rate": 9.510669796841014e-05, |
| "loss": 0.0301, |
| "step": 1850 |
| }, |
| { |
| "grad_norm": 0.5420807003974915, |
| "learning_rate": 9.503511150370727e-05, |
| "loss": 0.0326, |
| "step": 1860 |
| }, |
| { |
| "grad_norm": 0.644017219543457, |
| "learning_rate": 9.496303254224024e-05, |
| "loss": 0.0318, |
| "step": 1870 |
| }, |
| { |
| "grad_norm": 0.4648231565952301, |
| "learning_rate": 9.489046187225306e-05, |
| "loss": 0.0301, |
| "step": 1880 |
| }, |
| { |
| "grad_norm": 0.5046685338020325, |
| "learning_rate": 9.481740028736692e-05, |
| "loss": 0.0314, |
| "step": 1890 |
| }, |
| { |
| "grad_norm": 0.49768561124801636, |
| "learning_rate": 9.474384858657164e-05, |
| "loss": 0.0291, |
| "step": 1900 |
| }, |
| { |
| "grad_norm": 0.5587893724441528, |
| "learning_rate": 9.466980757421679e-05, |
| "loss": 0.0296, |
| "step": 1910 |
| }, |
| { |
| "grad_norm": 0.5340442061424255, |
| "learning_rate": 9.459527806000305e-05, |
| "loss": 0.0313, |
| "step": 1920 |
| }, |
| { |
| "grad_norm": 0.5392602682113647, |
| "learning_rate": 9.452026085897325e-05, |
| "loss": 0.0308, |
| "step": 1930 |
| }, |
| { |
| "grad_norm": 0.4618771970272064, |
| "learning_rate": 9.444475679150348e-05, |
| "loss": 0.0296, |
| "step": 1940 |
| }, |
| { |
| "grad_norm": 0.4055277705192566, |
| "learning_rate": 9.436876668329411e-05, |
| "loss": 0.028, |
| "step": 1950 |
| }, |
| { |
| "grad_norm": 0.5005772113800049, |
| "learning_rate": 9.429229136536079e-05, |
| "loss": 0.0273, |
| "step": 1960 |
| }, |
| { |
| "grad_norm": 0.42232707142829895, |
| "learning_rate": 9.421533167402534e-05, |
| "loss": 0.0286, |
| "step": 1970 |
| }, |
| { |
| "grad_norm": 0.5429880619049072, |
| "learning_rate": 9.413788845090666e-05, |
| "loss": 0.029, |
| "step": 1980 |
| }, |
| { |
| "grad_norm": 0.4448404312133789, |
| "learning_rate": 9.405996254291136e-05, |
| "loss": 0.0284, |
| "step": 1990 |
| }, |
| { |
| "grad_norm": 0.5074642300605774, |
| "learning_rate": 9.398155480222474e-05, |
| "loss": 0.0283, |
| "step": 2000 |
| }, |
| { |
| "grad_norm": 0.4470667243003845, |
| "learning_rate": 9.390266608630128e-05, |
| "loss": 0.0267, |
| "step": 2010 |
| }, |
| { |
| "grad_norm": 0.47320127487182617, |
| "learning_rate": 9.38232972578553e-05, |
| "loss": 0.0303, |
| "step": 2020 |
| }, |
| { |
| "grad_norm": 0.5718346238136292, |
| "learning_rate": 9.374344918485164e-05, |
| "loss": 0.0296, |
| "step": 2030 |
| }, |
| { |
| "grad_norm": 0.4110424518585205, |
| "learning_rate": 9.366312274049602e-05, |
| "loss": 0.028, |
| "step": 2040 |
| }, |
| { |
| "grad_norm": 0.41520369052886963, |
| "learning_rate": 9.358231880322554e-05, |
| "loss": 0.0296, |
| "step": 2050 |
| }, |
| { |
| "grad_norm": 0.4130535423755646, |
| "learning_rate": 9.350103825669916e-05, |
| "loss": 0.0286, |
| "step": 2060 |
| }, |
| { |
| "grad_norm": 0.5143803358078003, |
| "learning_rate": 9.341928198978787e-05, |
| "loss": 0.0285, |
| "step": 2070 |
| }, |
| { |
| "grad_norm": 0.5418136119842529, |
| "learning_rate": 9.333705089656512e-05, |
| "loss": 0.0264, |
| "step": 2080 |
| }, |
| { |
| "grad_norm": 0.46870583295822144, |
| "learning_rate": 9.325434587629698e-05, |
| "loss": 0.0317, |
| "step": 2090 |
| }, |
| { |
| "grad_norm": 0.417431116104126, |
| "learning_rate": 9.31711678334323e-05, |
| "loss": 0.0284, |
| "step": 2100 |
| }, |
| { |
| "grad_norm": 0.49152880907058716, |
| "learning_rate": 9.308751767759282e-05, |
| "loss": 0.025, |
| "step": 2110 |
| }, |
| { |
| "grad_norm": 0.378698468208313, |
| "learning_rate": 9.300339632356325e-05, |
| "loss": 0.027, |
| "step": 2120 |
| }, |
| { |
| "grad_norm": 0.4329814016819, |
| "learning_rate": 9.291880469128124e-05, |
| "loss": 0.0299, |
| "step": 2130 |
| }, |
| { |
| "grad_norm": 0.49008893966674805, |
| "learning_rate": 9.283374370582732e-05, |
| "loss": 0.0273, |
| "step": 2140 |
| }, |
| { |
| "grad_norm": 0.3793398141860962, |
| "learning_rate": 9.274821429741482e-05, |
| "loss": 0.0264, |
| "step": 2150 |
| }, |
| { |
| "grad_norm": 0.3981456458568573, |
| "learning_rate": 9.266221740137961e-05, |
| "loss": 0.026, |
| "step": 2160 |
| }, |
| { |
| "grad_norm": 0.5248379707336426, |
| "learning_rate": 9.257575395817001e-05, |
| "loss": 0.0255, |
| "step": 2170 |
| }, |
| { |
| "grad_norm": 0.39933085441589355, |
| "learning_rate": 9.248882491333637e-05, |
| "loss": 0.0261, |
| "step": 2180 |
| }, |
| { |
| "grad_norm": 0.5205438733100891, |
| "learning_rate": 9.240143121752076e-05, |
| "loss": 0.0253, |
| "step": 2190 |
| }, |
| { |
| "grad_norm": 0.4256397783756256, |
| "learning_rate": 9.23135738264467e-05, |
| "loss": 0.0313, |
| "step": 2200 |
| }, |
| { |
| "grad_norm": 0.4098120629787445, |
| "learning_rate": 9.222525370090849e-05, |
| "loss": 0.0286, |
| "step": 2210 |
| }, |
| { |
| "grad_norm": 0.452364444732666, |
| "learning_rate": 9.213647180676088e-05, |
| "loss": 0.0313, |
| "step": 2220 |
| }, |
| { |
| "grad_norm": 0.5283573269844055, |
| "learning_rate": 9.204722911490846e-05, |
| "loss": 0.027, |
| "step": 2230 |
| }, |
| { |
| "grad_norm": 0.4341718852519989, |
| "learning_rate": 9.1957526601295e-05, |
| "loss": 0.026, |
| "step": 2240 |
| }, |
| { |
| "grad_norm": 0.5589078664779663, |
| "learning_rate": 9.186736524689281e-05, |
| "loss": 0.0277, |
| "step": 2250 |
| }, |
| { |
| "grad_norm": 0.3732184171676636, |
| "learning_rate": 9.177674603769204e-05, |
| "loss": 0.0289, |
| "step": 2260 |
| }, |
| { |
| "grad_norm": 0.39962926506996155, |
| "learning_rate": 9.168566996468983e-05, |
| "loss": 0.0262, |
| "step": 2270 |
| }, |
| { |
| "grad_norm": 0.43201372027397156, |
| "learning_rate": 9.159413802387951e-05, |
| "loss": 0.0239, |
| "step": 2280 |
| }, |
| { |
| "grad_norm": 0.4189751446247101, |
| "learning_rate": 9.150215121623974e-05, |
| "loss": 0.0266, |
| "step": 2290 |
| }, |
| { |
| "grad_norm": 0.3986872136592865, |
| "learning_rate": 9.140971054772349e-05, |
| "loss": 0.0255, |
| "step": 2300 |
| }, |
| { |
| "grad_norm": 0.4998125731945038, |
| "learning_rate": 9.131681702924713e-05, |
| "loss": 0.0281, |
| "step": 2310 |
| }, |
| { |
| "grad_norm": 0.4827892482280731, |
| "learning_rate": 9.122347167667926e-05, |
| "loss": 0.0281, |
| "step": 2320 |
| }, |
| { |
| "grad_norm": 0.4876689016819, |
| "learning_rate": 9.112967551082973e-05, |
| "loss": 0.0319, |
| "step": 2330 |
| }, |
| { |
| "grad_norm": 0.36984163522720337, |
| "learning_rate": 9.103542955743835e-05, |
| "loss": 0.0242, |
| "step": 2340 |
| }, |
| { |
| "grad_norm": 0.465818852186203, |
| "learning_rate": 9.094073484716381e-05, |
| "loss": 0.0314, |
| "step": 2350 |
| }, |
| { |
| "grad_norm": 0.37877270579338074, |
| "learning_rate": 9.084559241557226e-05, |
| "loss": 0.0262, |
| "step": 2360 |
| }, |
| { |
| "grad_norm": 0.4463783800601959, |
| "learning_rate": 9.075000330312608e-05, |
| "loss": 0.0263, |
| "step": 2370 |
| }, |
| { |
| "grad_norm": 0.47452881932258606, |
| "learning_rate": 9.065396855517253e-05, |
| "loss": 0.0272, |
| "step": 2380 |
| }, |
| { |
| "grad_norm": 0.424927681684494, |
| "learning_rate": 9.055748922193219e-05, |
| "loss": 0.0278, |
| "step": 2390 |
| }, |
| { |
| "grad_norm": 0.3524123728275299, |
| "learning_rate": 9.046056635848761e-05, |
| "loss": 0.0268, |
| "step": 2400 |
| }, |
| { |
| "grad_norm": 0.39357349276542664, |
| "learning_rate": 9.036320102477169e-05, |
| "loss": 0.0235, |
| "step": 2410 |
| }, |
| { |
| "grad_norm": 0.38801810145378113, |
| "learning_rate": 9.02653942855561e-05, |
| "loss": 0.0309, |
| "step": 2420 |
| }, |
| { |
| "grad_norm": 0.42541712522506714, |
| "learning_rate": 9.016714721043971e-05, |
| "loss": 0.027, |
| "step": 2430 |
| }, |
| { |
| "grad_norm": 0.42861104011535645, |
| "learning_rate": 9.006846087383675e-05, |
| "loss": 0.0274, |
| "step": 2440 |
| }, |
| { |
| "grad_norm": 0.44381630420684814, |
| "learning_rate": 8.996933635496523e-05, |
| "loss": 0.0264, |
| "step": 2450 |
| }, |
| { |
| "grad_norm": 0.5069416761398315, |
| "learning_rate": 8.986977473783498e-05, |
| "loss": 0.0243, |
| "step": 2460 |
| }, |
| { |
| "grad_norm": 0.5593004822731018, |
| "learning_rate": 8.97697771112359e-05, |
| "loss": 0.0266, |
| "step": 2470 |
| }, |
| { |
| "grad_norm": 0.49196624755859375, |
| "learning_rate": 8.966934456872602e-05, |
| "loss": 0.0254, |
| "step": 2480 |
| }, |
| { |
| "grad_norm": 0.42328518629074097, |
| "learning_rate": 8.95684782086195e-05, |
| "loss": 0.0317, |
| "step": 2490 |
| }, |
| { |
| "grad_norm": 0.40058237314224243, |
| "learning_rate": 8.946717913397476e-05, |
| "loss": 0.0257, |
| "step": 2500 |
| }, |
| { |
| "grad_norm": 0.45824214816093445, |
| "learning_rate": 8.93654484525822e-05, |
| "loss": 0.0267, |
| "step": 2510 |
| }, |
| { |
| "grad_norm": 0.47785720229148865, |
| "learning_rate": 8.926328727695226e-05, |
| "loss": 0.026, |
| "step": 2520 |
| }, |
| { |
| "grad_norm": 0.5189729928970337, |
| "learning_rate": 8.916069672430319e-05, |
| "loss": 0.0264, |
| "step": 2530 |
| }, |
| { |
| "grad_norm": 0.3164174556732178, |
| "learning_rate": 8.905767791654884e-05, |
| "loss": 0.0244, |
| "step": 2540 |
| }, |
| { |
| "grad_norm": 0.45995235443115234, |
| "learning_rate": 8.895423198028638e-05, |
| "loss": 0.0272, |
| "step": 2550 |
| }, |
| { |
| "grad_norm": 0.40050774812698364, |
| "learning_rate": 8.885036004678402e-05, |
| "loss": 0.0292, |
| "step": 2560 |
| }, |
| { |
| "grad_norm": 0.3644542098045349, |
| "learning_rate": 8.874606325196857e-05, |
| "loss": 0.0237, |
| "step": 2570 |
| }, |
| { |
| "grad_norm": 0.4536350667476654, |
| "learning_rate": 8.864134273641304e-05, |
| "loss": 0.025, |
| "step": 2580 |
| }, |
| { |
| "grad_norm": 0.35245734453201294, |
| "learning_rate": 8.853619964532427e-05, |
| "loss": 0.0233, |
| "step": 2590 |
| }, |
| { |
| "grad_norm": 0.4429668188095093, |
| "learning_rate": 8.843063512853019e-05, |
| "loss": 0.0285, |
| "step": 2600 |
| }, |
| { |
| "grad_norm": 0.43946343660354614, |
| "learning_rate": 8.832465034046749e-05, |
| "loss": 0.0263, |
| "step": 2610 |
| }, |
| { |
| "grad_norm": 0.4406358599662781, |
| "learning_rate": 8.821824644016882e-05, |
| "loss": 0.0254, |
| "step": 2620 |
| }, |
| { |
| "grad_norm": 0.48885712027549744, |
| "learning_rate": 8.811142459125019e-05, |
| "loss": 0.025, |
| "step": 2630 |
| }, |
| { |
| "grad_norm": 0.42471176385879517, |
| "learning_rate": 8.800418596189822e-05, |
| "loss": 0.0265, |
| "step": 2640 |
| }, |
| { |
| "grad_norm": 0.3454952836036682, |
| "learning_rate": 8.789653172485737e-05, |
| "loss": 0.0261, |
| "step": 2650 |
| }, |
| { |
| "grad_norm": 0.4365542232990265, |
| "learning_rate": 8.778846305741715e-05, |
| "loss": 0.0253, |
| "step": 2660 |
| }, |
| { |
| "grad_norm": 0.3438829779624939, |
| "learning_rate": 8.767998114139918e-05, |
| "loss": 0.0251, |
| "step": 2670 |
| }, |
| { |
| "grad_norm": 0.3312196433544159, |
| "learning_rate": 8.757108716314429e-05, |
| "loss": 0.0254, |
| "step": 2680 |
| }, |
| { |
| "grad_norm": 0.40338999032974243, |
| "learning_rate": 8.746178231349962e-05, |
| "loss": 0.0275, |
| "step": 2690 |
| }, |
| { |
| "grad_norm": 0.4243628978729248, |
| "learning_rate": 8.735206778780549e-05, |
| "loss": 0.0239, |
| "step": 2700 |
| }, |
| { |
| "grad_norm": 0.4020898938179016, |
| "learning_rate": 8.724194478588234e-05, |
| "loss": 0.0234, |
| "step": 2710 |
| }, |
| { |
| "grad_norm": 0.4327259361743927, |
| "learning_rate": 8.713141451201772e-05, |
| "loss": 0.0248, |
| "step": 2720 |
| }, |
| { |
| "grad_norm": 0.3352695107460022, |
| "learning_rate": 8.702047817495295e-05, |
| "loss": 0.0258, |
| "step": 2730 |
| }, |
| { |
| "grad_norm": 0.3333274722099304, |
| "learning_rate": 8.69091369878701e-05, |
| "loss": 0.0238, |
| "step": 2740 |
| }, |
| { |
| "grad_norm": 0.42753326892852783, |
| "learning_rate": 8.679739216837849e-05, |
| "loss": 0.0222, |
| "step": 2750 |
| }, |
| { |
| "grad_norm": 0.3095396161079407, |
| "learning_rate": 8.66852449385016e-05, |
| "loss": 0.0233, |
| "step": 2760 |
| }, |
| { |
| "grad_norm": 0.3271157741546631, |
| "learning_rate": 8.657269652466356e-05, |
| "loss": 0.0267, |
| "step": 2770 |
| }, |
| { |
| "grad_norm": 0.4156598150730133, |
| "learning_rate": 8.645974815767577e-05, |
| "loss": 0.0225, |
| "step": 2780 |
| }, |
| { |
| "grad_norm": 0.35358086228370667, |
| "learning_rate": 8.634640107272351e-05, |
| "loss": 0.023, |
| "step": 2790 |
| }, |
| { |
| "grad_norm": 0.43658044934272766, |
| "learning_rate": 8.623265650935234e-05, |
| "loss": 0.0256, |
| "step": 2800 |
| }, |
| { |
| "grad_norm": 0.39249366521835327, |
| "learning_rate": 8.611851571145456e-05, |
| "loss": 0.0256, |
| "step": 2810 |
| }, |
| { |
| "grad_norm": 0.4045146107673645, |
| "learning_rate": 8.600397992725566e-05, |
| "loss": 0.0265, |
| "step": 2820 |
| }, |
| { |
| "grad_norm": 0.46040022373199463, |
| "learning_rate": 8.588905040930061e-05, |
| "loss": 0.0223, |
| "step": 2830 |
| }, |
| { |
| "grad_norm": 0.4740993082523346, |
| "learning_rate": 8.577372841444022e-05, |
| "loss": 0.0238, |
| "step": 2840 |
| }, |
| { |
| "grad_norm": 0.5013309717178345, |
| "learning_rate": 8.565801520381736e-05, |
| "loss": 0.0244, |
| "step": 2850 |
| }, |
| { |
| "grad_norm": 0.5572243332862854, |
| "learning_rate": 8.554191204285313e-05, |
| "loss": 0.0278, |
| "step": 2860 |
| }, |
| { |
| "grad_norm": 0.4136684536933899, |
| "learning_rate": 8.542542020123315e-05, |
| "loss": 0.0268, |
| "step": 2870 |
| }, |
| { |
| "grad_norm": 0.4344552159309387, |
| "learning_rate": 8.530854095289347e-05, |
| "loss": 0.0237, |
| "step": 2880 |
| }, |
| { |
| "grad_norm": 0.44237616658210754, |
| "learning_rate": 8.519127557600688e-05, |
| "loss": 0.0258, |
| "step": 2890 |
| }, |
| { |
| "grad_norm": 0.40503379702568054, |
| "learning_rate": 8.507362535296871e-05, |
| "loss": 0.0245, |
| "step": 2900 |
| }, |
| { |
| "grad_norm": 0.4115789234638214, |
| "learning_rate": 8.495559157038299e-05, |
| "loss": 0.0228, |
| "step": 2910 |
| }, |
| { |
| "grad_norm": 0.47725898027420044, |
| "learning_rate": 8.483717551904823e-05, |
| "loss": 0.0255, |
| "step": 2920 |
| }, |
| { |
| "grad_norm": 0.39906537532806396, |
| "learning_rate": 8.47183784939434e-05, |
| "loss": 0.028, |
| "step": 2930 |
| }, |
| { |
| "grad_norm": 0.3738861083984375, |
| "learning_rate": 8.459920179421374e-05, |
| "loss": 0.0276, |
| "step": 2940 |
| }, |
| { |
| "grad_norm": 0.4364217519760132, |
| "learning_rate": 8.447964672315656e-05, |
| "loss": 0.0242, |
| "step": 2950 |
| }, |
| { |
| "grad_norm": 0.40446925163269043, |
| "learning_rate": 8.435971458820692e-05, |
| "loss": 0.0245, |
| "step": 2960 |
| }, |
| { |
| "grad_norm": 0.43057939410209656, |
| "learning_rate": 8.423940670092345e-05, |
| "loss": 0.0229, |
| "step": 2970 |
| }, |
| { |
| "grad_norm": 0.3856443464756012, |
| "learning_rate": 8.411872437697394e-05, |
| "loss": 0.0217, |
| "step": 2980 |
| }, |
| { |
| "grad_norm": 0.3364211320877075, |
| "learning_rate": 8.399766893612096e-05, |
| "loss": 0.0223, |
| "step": 2990 |
| }, |
| { |
| "grad_norm": 0.47767484188079834, |
| "learning_rate": 8.38762417022074e-05, |
| "loss": 0.0231, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|