| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.7923470139503479, |
| "learning_rate": 0.00019967935871743488, |
| "loss": 0.4612, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.5407691597938538, |
| "learning_rate": 0.00019887775551102204, |
| "loss": 0.2353, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.5324429869651794, |
| "learning_rate": 0.00019807615230460924, |
| "loss": 0.2157, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5783653259277344, |
| "learning_rate": 0.0001972745490981964, |
| "loss": 0.2017, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.47596609592437744, |
| "learning_rate": 0.00019647294589178357, |
| "loss": 0.1951, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.4201284945011139, |
| "learning_rate": 0.00019567134268537074, |
| "loss": 0.2002, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.4552278518676758, |
| "learning_rate": 0.00019486973947895793, |
| "loss": 0.1904, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.7762654423713684, |
| "learning_rate": 0.0001940681362725451, |
| "loss": 0.1789, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.48717886209487915, |
| "learning_rate": 0.00019326653306613227, |
| "loss": 0.1754, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5642560720443726, |
| "learning_rate": 0.00019246492985971943, |
| "loss": 0.1807, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.5994827151298523, |
| "learning_rate": 0.00019166332665330663, |
| "loss": 0.1865, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5326362252235413, |
| "learning_rate": 0.0001908617234468938, |
| "loss": 0.1845, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.8340044021606445, |
| "learning_rate": 0.00019006012024048096, |
| "loss": 0.1696, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.7146855592727661, |
| "learning_rate": 0.00018925851703406813, |
| "loss": 0.1844, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.5249161720275879, |
| "learning_rate": 0.00018845691382765532, |
| "loss": 0.1757, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.7083638310432434, |
| "learning_rate": 0.0001876553106212425, |
| "loss": 0.1918, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.40141209959983826, |
| "learning_rate": 0.00018685370741482966, |
| "loss": 0.1868, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.4578303098678589, |
| "learning_rate": 0.00018605210420841683, |
| "loss": 0.1809, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.49453985691070557, |
| "learning_rate": 0.00018525050100200402, |
| "loss": 0.1747, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.5069108009338379, |
| "learning_rate": 0.0001844488977955912, |
| "loss": 0.1781, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.6718438267707825, |
| "learning_rate": 0.00018364729458917838, |
| "loss": 0.1772, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.4564284682273865, |
| "learning_rate": 0.00018284569138276555, |
| "loss": 0.177, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.47330281138420105, |
| "learning_rate": 0.00018204408817635271, |
| "loss": 0.1715, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.4193236827850342, |
| "learning_rate": 0.0001812424849699399, |
| "loss": 0.1749, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5325985550880432, |
| "learning_rate": 0.00018044088176352708, |
| "loss": 0.18, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.5181522965431213, |
| "learning_rate": 0.00017963927855711424, |
| "loss": 0.1537, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.4463769197463989, |
| "learning_rate": 0.0001788376753507014, |
| "loss": 0.1432, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.5249760746955872, |
| "learning_rate": 0.00017803607214428858, |
| "loss": 0.1607, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.42788001894950867, |
| "learning_rate": 0.00017723446893787577, |
| "loss": 0.1473, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.6883480548858643, |
| "learning_rate": 0.00017643286573146294, |
| "loss": 0.1481, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.5434200167655945, |
| "learning_rate": 0.0001756312625250501, |
| "loss": 0.1557, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.770155668258667, |
| "learning_rate": 0.00017482965931863727, |
| "loss": 0.1397, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.6204828023910522, |
| "learning_rate": 0.00017402805611222447, |
| "loss": 0.1569, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.7150607705116272, |
| "learning_rate": 0.00017322645290581163, |
| "loss": 0.1523, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.5075599551200867, |
| "learning_rate": 0.0001724248496993988, |
| "loss": 0.1631, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.6147515177726746, |
| "learning_rate": 0.00017162324649298597, |
| "loss": 0.1476, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.6109094023704529, |
| "learning_rate": 0.00017082164328657316, |
| "loss": 0.1522, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.5690982341766357, |
| "learning_rate": 0.00017002004008016033, |
| "loss": 0.1555, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.44981974363327026, |
| "learning_rate": 0.0001692184368737475, |
| "loss": 0.1592, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.4784778952598572, |
| "learning_rate": 0.00016841683366733466, |
| "loss": 0.1636, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.5931491851806641, |
| "learning_rate": 0.00016761523046092186, |
| "loss": 0.152, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.663811206817627, |
| "learning_rate": 0.00016681362725450903, |
| "loss": 0.1704, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.4538479447364807, |
| "learning_rate": 0.0001660120240480962, |
| "loss": 0.1482, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.6272454261779785, |
| "learning_rate": 0.00016521042084168336, |
| "loss": 0.1545, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.5804396271705627, |
| "learning_rate": 0.00016440881763527055, |
| "loss": 0.1622, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.5440978407859802, |
| "learning_rate": 0.00016360721442885772, |
| "loss": 0.1607, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.6250975131988525, |
| "learning_rate": 0.0001628056112224449, |
| "loss": 0.1547, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.41538259387016296, |
| "learning_rate": 0.00016200400801603208, |
| "loss": 0.146, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.5187687277793884, |
| "learning_rate": 0.00016120240480961925, |
| "loss": 0.1663, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.392794132232666, |
| "learning_rate": 0.00016040080160320644, |
| "loss": 0.1492, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 0.5112284421920776, |
| "learning_rate": 0.0001595991983967936, |
| "loss": 0.1276, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.577057421207428, |
| "learning_rate": 0.00015879759519038078, |
| "loss": 0.118, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.4589303433895111, |
| "learning_rate": 0.00015799599198396794, |
| "loss": 0.1251, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.4399261176586151, |
| "learning_rate": 0.0001571943887775551, |
| "loss": 0.1174, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.47207799553871155, |
| "learning_rate": 0.0001563927855711423, |
| "loss": 0.1301, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.5883737802505493, |
| "learning_rate": 0.00015559118236472947, |
| "loss": 0.1285, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 0.5191317796707153, |
| "learning_rate": 0.00015478957915831664, |
| "loss": 0.1232, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.546741247177124, |
| "learning_rate": 0.0001539879759519038, |
| "loss": 0.1177, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.5307457447052002, |
| "learning_rate": 0.000153186372745491, |
| "loss": 0.1226, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.49284040927886963, |
| "learning_rate": 0.00015238476953907817, |
| "loss": 0.1398, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.46590954065322876, |
| "learning_rate": 0.00015158316633266534, |
| "loss": 0.1287, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.5511562824249268, |
| "learning_rate": 0.0001507815631262525, |
| "loss": 0.118, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.5350615978240967, |
| "learning_rate": 0.0001499799599198397, |
| "loss": 0.1295, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.5491256713867188, |
| "learning_rate": 0.00014917835671342686, |
| "loss": 0.129, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.5817452669143677, |
| "learning_rate": 0.00014837675350701403, |
| "loss": 0.1373, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.4481925964355469, |
| "learning_rate": 0.0001475751503006012, |
| "loss": 0.1389, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 0.5628023743629456, |
| "learning_rate": 0.0001467735470941884, |
| "loss": 0.1238, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.678969144821167, |
| "learning_rate": 0.00014597194388777556, |
| "loss": 0.1318, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.5662192106246948, |
| "learning_rate": 0.00014517034068136273, |
| "loss": 0.1242, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.532526433467865, |
| "learning_rate": 0.0001443687374749499, |
| "loss": 0.1279, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 0.4268568158149719, |
| "learning_rate": 0.00014356713426853706, |
| "loss": 0.1342, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.6674071550369263, |
| "learning_rate": 0.00014276553106212425, |
| "loss": 0.1374, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.5499129891395569, |
| "learning_rate": 0.00014196392785571142, |
| "loss": 0.1226, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.5989218354225159, |
| "learning_rate": 0.0001411623246492986, |
| "loss": 0.1282, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.5111316442489624, |
| "learning_rate": 0.00014036072144288576, |
| "loss": 0.1241, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 0.4813462197780609, |
| "learning_rate": 0.00013955911823647295, |
| "loss": 0.0941, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.08, |
| "grad_norm": 0.5328338742256165, |
| "learning_rate": 0.00013875751503006014, |
| "loss": 0.0914, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 0.5423403382301331, |
| "learning_rate": 0.0001379559118236473, |
| "loss": 0.0988, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.16, |
| "grad_norm": 0.6739408373832703, |
| "learning_rate": 0.00013715430861723448, |
| "loss": 0.0995, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.5204668045043945, |
| "learning_rate": 0.00013635270541082165, |
| "loss": 0.0973, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.24, |
| "grad_norm": 0.6864869594573975, |
| "learning_rate": 0.00013555110220440884, |
| "loss": 0.0911, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 0.587958037853241, |
| "learning_rate": 0.000134749498997996, |
| "loss": 0.103, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.32, |
| "grad_norm": 0.6141840219497681, |
| "learning_rate": 0.00013394789579158317, |
| "loss": 0.0925, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 0.5647754073143005, |
| "learning_rate": 0.00013314629258517034, |
| "loss": 0.1056, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 0.5085099339485168, |
| "learning_rate": 0.00013234468937875754, |
| "loss": 0.1071, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 0.6452666521072388, |
| "learning_rate": 0.0001315430861723447, |
| "loss": 0.1074, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.48, |
| "grad_norm": 0.5613518357276917, |
| "learning_rate": 0.00013074148296593187, |
| "loss": 0.1066, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 0.5741850137710571, |
| "learning_rate": 0.00012993987975951904, |
| "loss": 0.1085, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.56, |
| "grad_norm": 0.63754802942276, |
| "learning_rate": 0.00012913827655310623, |
| "loss": 0.1053, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.5717872381210327, |
| "learning_rate": 0.0001283366733466934, |
| "loss": 0.1058, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.64, |
| "grad_norm": 0.5148450136184692, |
| "learning_rate": 0.00012753507014028056, |
| "loss": 0.1038, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 0.5819758772850037, |
| "learning_rate": 0.00012673346693386773, |
| "loss": 0.1119, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.7199999999999998, |
| "grad_norm": 0.6014116406440735, |
| "learning_rate": 0.00012593186372745493, |
| "loss": 0.1081, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 0.574530303478241, |
| "learning_rate": 0.0001251302605210421, |
| "loss": 0.1023, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 0.6252711415290833, |
| "learning_rate": 0.00012432865731462926, |
| "loss": 0.0977, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 0.587404727935791, |
| "learning_rate": 0.00012352705410821643, |
| "loss": 0.0996, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.88, |
| "grad_norm": 0.47545889019966125, |
| "learning_rate": 0.0001227254509018036, |
| "loss": 0.105, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 0.6459314823150635, |
| "learning_rate": 0.00012192384769539077, |
| "loss": 0.1003, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.96, |
| "grad_norm": 0.5450368523597717, |
| "learning_rate": 0.00012112224448897796, |
| "loss": 0.1124, |
| "step": 990 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.5874629616737366, |
| "learning_rate": 0.00012032064128256512, |
| "loss": 0.1089, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.04, |
| "grad_norm": 0.6516720056533813, |
| "learning_rate": 0.0001195190380761523, |
| "loss": 0.0842, |
| "step": 1010 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 0.70643550157547, |
| "learning_rate": 0.00011871743486973947, |
| "loss": 0.075, |
| "step": 1020 |
| }, |
| { |
| "epoch": 4.12, |
| "grad_norm": 0.7798948287963867, |
| "learning_rate": 0.00011791583166332665, |
| "loss": 0.079, |
| "step": 1030 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 0.7087392807006836, |
| "learning_rate": 0.00011711422845691385, |
| "loss": 0.0759, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 0.5795313715934753, |
| "learning_rate": 0.00011631262525050101, |
| "loss": 0.0781, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.24, |
| "grad_norm": 0.6577343940734863, |
| "learning_rate": 0.00011551102204408819, |
| "loss": 0.0802, |
| "step": 1060 |
| }, |
| { |
| "epoch": 4.28, |
| "grad_norm": 0.6984887719154358, |
| "learning_rate": 0.00011470941883767536, |
| "loss": 0.0745, |
| "step": 1070 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 0.7415528893470764, |
| "learning_rate": 0.00011390781563126254, |
| "loss": 0.0776, |
| "step": 1080 |
| }, |
| { |
| "epoch": 4.36, |
| "grad_norm": 0.46346089243888855, |
| "learning_rate": 0.00011310621242484971, |
| "loss": 0.0793, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.5514031052589417, |
| "learning_rate": 0.00011230460921843689, |
| "loss": 0.0766, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.44, |
| "grad_norm": 0.5561518669128418, |
| "learning_rate": 0.00011150300601202406, |
| "loss": 0.069, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 0.6889087557792664, |
| "learning_rate": 0.00011070140280561124, |
| "loss": 0.0809, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.52, |
| "grad_norm": 0.6279156804084778, |
| "learning_rate": 0.0001098997995991984, |
| "loss": 0.0684, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.5600000000000005, |
| "grad_norm": 0.4698103964328766, |
| "learning_rate": 0.00010909819639278558, |
| "loss": 0.0862, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.5812392234802246, |
| "learning_rate": 0.00010829659318637275, |
| "loss": 0.0827, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 0.6872307658195496, |
| "learning_rate": 0.00010749498997995993, |
| "loss": 0.0768, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.68, |
| "grad_norm": 0.6259503364562988, |
| "learning_rate": 0.0001066933867735471, |
| "loss": 0.0824, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 0.5298041701316833, |
| "learning_rate": 0.00010589178356713428, |
| "loss": 0.0679, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.76, |
| "grad_norm": 0.8088281750679016, |
| "learning_rate": 0.00010509018036072145, |
| "loss": 0.0805, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.49759751558303833, |
| "learning_rate": 0.00010428857715430861, |
| "loss": 0.0761, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.84, |
| "grad_norm": 0.4452104866504669, |
| "learning_rate": 0.0001034869739478958, |
| "loss": 0.0836, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 0.4896758794784546, |
| "learning_rate": 0.00010268537074148296, |
| "loss": 0.0709, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.92, |
| "grad_norm": 0.6835769414901733, |
| "learning_rate": 0.00010188376753507014, |
| "loss": 0.0799, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 0.7380080819129944, |
| "learning_rate": 0.00010108216432865731, |
| "loss": 0.0764, |
| "step": 1240 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.7432966828346252, |
| "learning_rate": 0.00010028056112224449, |
| "loss": 0.079, |
| "step": 1250 |
| }, |
| { |
| "epoch": 5.04, |
| "grad_norm": 0.6639225482940674, |
| "learning_rate": 9.947895791583167e-05, |
| "loss": 0.0554, |
| "step": 1260 |
| }, |
| { |
| "epoch": 5.08, |
| "grad_norm": 0.5844371914863586, |
| "learning_rate": 9.867735470941885e-05, |
| "loss": 0.0502, |
| "step": 1270 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 0.4597128629684448, |
| "learning_rate": 9.787575150300602e-05, |
| "loss": 0.0596, |
| "step": 1280 |
| }, |
| { |
| "epoch": 5.16, |
| "grad_norm": 0.7378055453300476, |
| "learning_rate": 9.70741482965932e-05, |
| "loss": 0.0556, |
| "step": 1290 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 0.5333693027496338, |
| "learning_rate": 9.627254509018037e-05, |
| "loss": 0.0485, |
| "step": 1300 |
| }, |
| { |
| "epoch": 5.24, |
| "grad_norm": 0.51535564661026, |
| "learning_rate": 9.547094188376755e-05, |
| "loss": 0.0506, |
| "step": 1310 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 0.5654531121253967, |
| "learning_rate": 9.466933867735471e-05, |
| "loss": 0.0556, |
| "step": 1320 |
| }, |
| { |
| "epoch": 5.32, |
| "grad_norm": 0.6834219694137573, |
| "learning_rate": 9.386773547094188e-05, |
| "loss": 0.0587, |
| "step": 1330 |
| }, |
| { |
| "epoch": 5.36, |
| "grad_norm": 0.5739651322364807, |
| "learning_rate": 9.306613226452906e-05, |
| "loss": 0.0533, |
| "step": 1340 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 0.5924126505851746, |
| "learning_rate": 9.226452905811623e-05, |
| "loss": 0.0566, |
| "step": 1350 |
| }, |
| { |
| "epoch": 5.44, |
| "grad_norm": 0.6210141181945801, |
| "learning_rate": 9.146292585170341e-05, |
| "loss": 0.058, |
| "step": 1360 |
| }, |
| { |
| "epoch": 5.48, |
| "grad_norm": 0.445516437292099, |
| "learning_rate": 9.066132264529058e-05, |
| "loss": 0.0547, |
| "step": 1370 |
| }, |
| { |
| "epoch": 5.52, |
| "grad_norm": 0.5603981614112854, |
| "learning_rate": 8.985971943887777e-05, |
| "loss": 0.0565, |
| "step": 1380 |
| }, |
| { |
| "epoch": 5.5600000000000005, |
| "grad_norm": 0.6049801707267761, |
| "learning_rate": 8.905811623246494e-05, |
| "loss": 0.0579, |
| "step": 1390 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.506960391998291, |
| "learning_rate": 8.825651302605212e-05, |
| "loss": 0.0493, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.64, |
| "grad_norm": 0.6527524590492249, |
| "learning_rate": 8.745490981963928e-05, |
| "loss": 0.0596, |
| "step": 1410 |
| }, |
| { |
| "epoch": 5.68, |
| "grad_norm": 0.6459997296333313, |
| "learning_rate": 8.665330661322647e-05, |
| "loss": 0.0512, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.72, |
| "grad_norm": 0.4932115375995636, |
| "learning_rate": 8.585170340681363e-05, |
| "loss": 0.0573, |
| "step": 1430 |
| }, |
| { |
| "epoch": 5.76, |
| "grad_norm": 0.7221292853355408, |
| "learning_rate": 8.50501002004008e-05, |
| "loss": 0.0605, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.8, |
| "grad_norm": 0.43518489599227905, |
| "learning_rate": 8.424849699398798e-05, |
| "loss": 0.0519, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.84, |
| "grad_norm": 0.7757974863052368, |
| "learning_rate": 8.344689378757515e-05, |
| "loss": 0.0567, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.88, |
| "grad_norm": 0.6453976631164551, |
| "learning_rate": 8.264529058116233e-05, |
| "loss": 0.0596, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.92, |
| "grad_norm": 0.6767393946647644, |
| "learning_rate": 8.18436873747495e-05, |
| "loss": 0.0597, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.96, |
| "grad_norm": 0.6432074904441833, |
| "learning_rate": 8.104208416833668e-05, |
| "loss": 0.0608, |
| "step": 1490 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.4562494456768036, |
| "learning_rate": 8.024048096192384e-05, |
| "loss": 0.0519, |
| "step": 1500 |
| }, |
| { |
| "epoch": 6.04, |
| "grad_norm": 0.5894383788108826, |
| "learning_rate": 7.943887775551102e-05, |
| "loss": 0.0376, |
| "step": 1510 |
| }, |
| { |
| "epoch": 6.08, |
| "grad_norm": 0.5043510794639587, |
| "learning_rate": 7.86372745490982e-05, |
| "loss": 0.0364, |
| "step": 1520 |
| }, |
| { |
| "epoch": 6.12, |
| "grad_norm": 0.753901481628418, |
| "learning_rate": 7.783567134268538e-05, |
| "loss": 0.0387, |
| "step": 1530 |
| }, |
| { |
| "epoch": 6.16, |
| "grad_norm": 0.5726089477539062, |
| "learning_rate": 7.703406813627255e-05, |
| "loss": 0.0362, |
| "step": 1540 |
| }, |
| { |
| "epoch": 6.2, |
| "grad_norm": 0.7004730701446533, |
| "learning_rate": 7.623246492985973e-05, |
| "loss": 0.0327, |
| "step": 1550 |
| }, |
| { |
| "epoch": 6.24, |
| "grad_norm": 0.522422194480896, |
| "learning_rate": 7.54308617234469e-05, |
| "loss": 0.0326, |
| "step": 1560 |
| }, |
| { |
| "epoch": 6.28, |
| "grad_norm": 0.7951876521110535, |
| "learning_rate": 7.462925851703407e-05, |
| "loss": 0.0396, |
| "step": 1570 |
| }, |
| { |
| "epoch": 6.32, |
| "grad_norm": 0.5453311800956726, |
| "learning_rate": 7.382765531062125e-05, |
| "loss": 0.0366, |
| "step": 1580 |
| }, |
| { |
| "epoch": 6.36, |
| "grad_norm": 0.7363042235374451, |
| "learning_rate": 7.302605210420841e-05, |
| "loss": 0.0403, |
| "step": 1590 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 0.7100352048873901, |
| "learning_rate": 7.22244488977956e-05, |
| "loss": 0.0392, |
| "step": 1600 |
| }, |
| { |
| "epoch": 6.44, |
| "grad_norm": 0.9634444713592529, |
| "learning_rate": 7.142284569138276e-05, |
| "loss": 0.0398, |
| "step": 1610 |
| }, |
| { |
| "epoch": 6.48, |
| "grad_norm": 0.6527013182640076, |
| "learning_rate": 7.062124248496994e-05, |
| "loss": 0.0369, |
| "step": 1620 |
| }, |
| { |
| "epoch": 6.52, |
| "grad_norm": 0.5168381333351135, |
| "learning_rate": 6.981963927855711e-05, |
| "loss": 0.0399, |
| "step": 1630 |
| }, |
| { |
| "epoch": 6.5600000000000005, |
| "grad_norm": 0.5673239827156067, |
| "learning_rate": 6.901803607214429e-05, |
| "loss": 0.0364, |
| "step": 1640 |
| }, |
| { |
| "epoch": 6.6, |
| "grad_norm": 0.4419175684452057, |
| "learning_rate": 6.821643286573146e-05, |
| "loss": 0.034, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.64, |
| "grad_norm": 0.5721241235733032, |
| "learning_rate": 6.741482965931865e-05, |
| "loss": 0.0407, |
| "step": 1660 |
| }, |
| { |
| "epoch": 6.68, |
| "grad_norm": 0.8612061738967896, |
| "learning_rate": 6.661322645290582e-05, |
| "loss": 0.0369, |
| "step": 1670 |
| }, |
| { |
| "epoch": 6.72, |
| "grad_norm": 0.6673168540000916, |
| "learning_rate": 6.581162324649299e-05, |
| "loss": 0.034, |
| "step": 1680 |
| }, |
| { |
| "epoch": 6.76, |
| "grad_norm": 0.816121518611908, |
| "learning_rate": 6.501002004008017e-05, |
| "loss": 0.0372, |
| "step": 1690 |
| }, |
| { |
| "epoch": 6.8, |
| "grad_norm": 0.5812684893608093, |
| "learning_rate": 6.420841683366733e-05, |
| "loss": 0.0379, |
| "step": 1700 |
| }, |
| { |
| "epoch": 6.84, |
| "grad_norm": 0.6159153580665588, |
| "learning_rate": 6.340681362725451e-05, |
| "loss": 0.0346, |
| "step": 1710 |
| }, |
| { |
| "epoch": 6.88, |
| "grad_norm": 0.809228241443634, |
| "learning_rate": 6.260521042084168e-05, |
| "loss": 0.0486, |
| "step": 1720 |
| }, |
| { |
| "epoch": 6.92, |
| "grad_norm": 0.6421244740486145, |
| "learning_rate": 6.180360721442886e-05, |
| "loss": 0.0387, |
| "step": 1730 |
| }, |
| { |
| "epoch": 6.96, |
| "grad_norm": 0.7321845889091492, |
| "learning_rate": 6.1002004008016036e-05, |
| "loss": 0.0395, |
| "step": 1740 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.7744494676589966, |
| "learning_rate": 6.020040080160321e-05, |
| "loss": 0.0434, |
| "step": 1750 |
| }, |
| { |
| "epoch": 7.04, |
| "grad_norm": 0.6629056334495544, |
| "learning_rate": 5.9398797595190384e-05, |
| "loss": 0.0229, |
| "step": 1760 |
| }, |
| { |
| "epoch": 7.08, |
| "grad_norm": 0.5707855224609375, |
| "learning_rate": 5.859719438877756e-05, |
| "loss": 0.0261, |
| "step": 1770 |
| }, |
| { |
| "epoch": 7.12, |
| "grad_norm": 0.5904133319854736, |
| "learning_rate": 5.7795591182364725e-05, |
| "loss": 0.0239, |
| "step": 1780 |
| }, |
| { |
| "epoch": 7.16, |
| "grad_norm": 0.4738862216472626, |
| "learning_rate": 5.69939879759519e-05, |
| "loss": 0.0205, |
| "step": 1790 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 0.5075474381446838, |
| "learning_rate": 5.6192384769539086e-05, |
| "loss": 0.025, |
| "step": 1800 |
| }, |
| { |
| "epoch": 7.24, |
| "grad_norm": 0.5726251602172852, |
| "learning_rate": 5.539078156312626e-05, |
| "loss": 0.0247, |
| "step": 1810 |
| }, |
| { |
| "epoch": 7.28, |
| "grad_norm": 0.5094057321548462, |
| "learning_rate": 5.4589178356713434e-05, |
| "loss": 0.0189, |
| "step": 1820 |
| }, |
| { |
| "epoch": 7.32, |
| "grad_norm": 0.47997888922691345, |
| "learning_rate": 5.378757515030061e-05, |
| "loss": 0.0221, |
| "step": 1830 |
| }, |
| { |
| "epoch": 7.36, |
| "grad_norm": 0.5335679650306702, |
| "learning_rate": 5.298597194388778e-05, |
| "loss": 0.0239, |
| "step": 1840 |
| }, |
| { |
| "epoch": 7.4, |
| "grad_norm": 0.5913345217704773, |
| "learning_rate": 5.2184368737474955e-05, |
| "loss": 0.0237, |
| "step": 1850 |
| }, |
| { |
| "epoch": 7.44, |
| "grad_norm": 0.41921791434288025, |
| "learning_rate": 5.138276553106213e-05, |
| "loss": 0.0294, |
| "step": 1860 |
| }, |
| { |
| "epoch": 7.48, |
| "grad_norm": 0.610205352306366, |
| "learning_rate": 5.05811623246493e-05, |
| "loss": 0.0237, |
| "step": 1870 |
| }, |
| { |
| "epoch": 7.52, |
| "grad_norm": 0.5666776299476624, |
| "learning_rate": 4.977955911823648e-05, |
| "loss": 0.0236, |
| "step": 1880 |
| }, |
| { |
| "epoch": 7.5600000000000005, |
| "grad_norm": 0.42802920937538147, |
| "learning_rate": 4.897795591182365e-05, |
| "loss": 0.0239, |
| "step": 1890 |
| }, |
| { |
| "epoch": 7.6, |
| "grad_norm": 0.748887836933136, |
| "learning_rate": 4.8176352705410824e-05, |
| "loss": 0.0269, |
| "step": 1900 |
| }, |
| { |
| "epoch": 7.64, |
| "grad_norm": 0.43109023571014404, |
| "learning_rate": 4.7374749498998e-05, |
| "loss": 0.0246, |
| "step": 1910 |
| }, |
| { |
| "epoch": 7.68, |
| "grad_norm": 0.43403562903404236, |
| "learning_rate": 4.657314629258517e-05, |
| "loss": 0.0219, |
| "step": 1920 |
| }, |
| { |
| "epoch": 7.72, |
| "grad_norm": 0.5174989104270935, |
| "learning_rate": 4.5771543086172346e-05, |
| "loss": 0.0229, |
| "step": 1930 |
| }, |
| { |
| "epoch": 7.76, |
| "grad_norm": 0.5523115396499634, |
| "learning_rate": 4.496993987975952e-05, |
| "loss": 0.0242, |
| "step": 1940 |
| }, |
| { |
| "epoch": 7.8, |
| "grad_norm": 0.4592013359069824, |
| "learning_rate": 4.4168336673346694e-05, |
| "loss": 0.0237, |
| "step": 1950 |
| }, |
| { |
| "epoch": 7.84, |
| "grad_norm": 0.574874997138977, |
| "learning_rate": 4.336673346693387e-05, |
| "loss": 0.0207, |
| "step": 1960 |
| }, |
| { |
| "epoch": 7.88, |
| "grad_norm": 0.6085746884346008, |
| "learning_rate": 4.256513026052105e-05, |
| "loss": 0.0213, |
| "step": 1970 |
| }, |
| { |
| "epoch": 7.92, |
| "grad_norm": 0.5086420178413391, |
| "learning_rate": 4.176352705410822e-05, |
| "loss": 0.0234, |
| "step": 1980 |
| }, |
| { |
| "epoch": 7.96, |
| "grad_norm": 0.4850587844848633, |
| "learning_rate": 4.0961923847695396e-05, |
| "loss": 0.0227, |
| "step": 1990 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.469855934381485, |
| "learning_rate": 4.016032064128257e-05, |
| "loss": 0.026, |
| "step": 2000 |
| }, |
| { |
| "epoch": 8.04, |
| "grad_norm": 0.3705706000328064, |
| "learning_rate": 3.9358717434869744e-05, |
| "loss": 0.0156, |
| "step": 2010 |
| }, |
| { |
| "epoch": 8.08, |
| "grad_norm": 0.5124081969261169, |
| "learning_rate": 3.855711422845692e-05, |
| "loss": 0.0146, |
| "step": 2020 |
| }, |
| { |
| "epoch": 8.12, |
| "grad_norm": 0.3145318627357483, |
| "learning_rate": 3.7755511022044085e-05, |
| "loss": 0.0156, |
| "step": 2030 |
| }, |
| { |
| "epoch": 8.16, |
| "grad_norm": 0.47414782643318176, |
| "learning_rate": 3.6953907815631265e-05, |
| "loss": 0.015, |
| "step": 2040 |
| }, |
| { |
| "epoch": 8.2, |
| "grad_norm": 0.3775177597999573, |
| "learning_rate": 3.615230460921844e-05, |
| "loss": 0.0167, |
| "step": 2050 |
| }, |
| { |
| "epoch": 8.24, |
| "grad_norm": 0.43829330801963806, |
| "learning_rate": 3.535070140280561e-05, |
| "loss": 0.0135, |
| "step": 2060 |
| }, |
| { |
| "epoch": 8.28, |
| "grad_norm": 0.5022293329238892, |
| "learning_rate": 3.454909819639279e-05, |
| "loss": 0.013, |
| "step": 2070 |
| }, |
| { |
| "epoch": 8.32, |
| "grad_norm": 0.27599331736564636, |
| "learning_rate": 3.374749498997996e-05, |
| "loss": 0.0153, |
| "step": 2080 |
| }, |
| { |
| "epoch": 8.36, |
| "grad_norm": 0.82197105884552, |
| "learning_rate": 3.2945891783567135e-05, |
| "loss": 0.0164, |
| "step": 2090 |
| }, |
| { |
| "epoch": 8.4, |
| "grad_norm": 0.20758652687072754, |
| "learning_rate": 3.214428857715431e-05, |
| "loss": 0.0135, |
| "step": 2100 |
| }, |
| { |
| "epoch": 8.44, |
| "grad_norm": 0.29413196444511414, |
| "learning_rate": 3.134268537074149e-05, |
| "loss": 0.0122, |
| "step": 2110 |
| }, |
| { |
| "epoch": 8.48, |
| "grad_norm": 0.38946202397346497, |
| "learning_rate": 3.054108216432866e-05, |
| "loss": 0.0124, |
| "step": 2120 |
| }, |
| { |
| "epoch": 8.52, |
| "grad_norm": 0.3506149649620056, |
| "learning_rate": 2.9739478957915833e-05, |
| "loss": 0.0146, |
| "step": 2130 |
| }, |
| { |
| "epoch": 8.56, |
| "grad_norm": 0.31000277400016785, |
| "learning_rate": 2.8937875751503007e-05, |
| "loss": 0.0143, |
| "step": 2140 |
| }, |
| { |
| "epoch": 8.6, |
| "grad_norm": 0.3397505581378937, |
| "learning_rate": 2.813627254509018e-05, |
| "loss": 0.0146, |
| "step": 2150 |
| }, |
| { |
| "epoch": 8.64, |
| "grad_norm": 0.46700355410575867, |
| "learning_rate": 2.7334669338677355e-05, |
| "loss": 0.0148, |
| "step": 2160 |
| }, |
| { |
| "epoch": 8.68, |
| "grad_norm": 0.43519356846809387, |
| "learning_rate": 2.653306613226453e-05, |
| "loss": 0.0116, |
| "step": 2170 |
| }, |
| { |
| "epoch": 8.72, |
| "grad_norm": 0.3716731369495392, |
| "learning_rate": 2.5731462925851706e-05, |
| "loss": 0.0138, |
| "step": 2180 |
| }, |
| { |
| "epoch": 8.76, |
| "grad_norm": 0.43048301339149475, |
| "learning_rate": 2.4929859719438877e-05, |
| "loss": 0.0116, |
| "step": 2190 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 0.586083173751831, |
| "learning_rate": 2.4128256513026054e-05, |
| "loss": 0.0118, |
| "step": 2200 |
| }, |
| { |
| "epoch": 8.84, |
| "grad_norm": 0.5221779346466064, |
| "learning_rate": 2.3326653306613228e-05, |
| "loss": 0.013, |
| "step": 2210 |
| }, |
| { |
| "epoch": 8.88, |
| "grad_norm": 0.4493936598300934, |
| "learning_rate": 2.25250501002004e-05, |
| "loss": 0.0161, |
| "step": 2220 |
| }, |
| { |
| "epoch": 8.92, |
| "grad_norm": 0.629578173160553, |
| "learning_rate": 2.172344689378758e-05, |
| "loss": 0.0136, |
| "step": 2230 |
| }, |
| { |
| "epoch": 8.96, |
| "grad_norm": 0.445533812046051, |
| "learning_rate": 2.092184368737475e-05, |
| "loss": 0.0116, |
| "step": 2240 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.4184821546077728, |
| "learning_rate": 2.0120240480961923e-05, |
| "loss": 0.0108, |
| "step": 2250 |
| }, |
| { |
| "epoch": 9.04, |
| "grad_norm": 0.18857356905937195, |
| "learning_rate": 1.9318637274549097e-05, |
| "loss": 0.0088, |
| "step": 2260 |
| }, |
| { |
| "epoch": 9.08, |
| "grad_norm": 0.3190409541130066, |
| "learning_rate": 1.8517034068136274e-05, |
| "loss": 0.01, |
| "step": 2270 |
| }, |
| { |
| "epoch": 9.12, |
| "grad_norm": 0.23249320685863495, |
| "learning_rate": 1.7715430861723448e-05, |
| "loss": 0.0079, |
| "step": 2280 |
| }, |
| { |
| "epoch": 9.16, |
| "grad_norm": 0.29095596075057983, |
| "learning_rate": 1.6913827655310622e-05, |
| "loss": 0.0071, |
| "step": 2290 |
| }, |
| { |
| "epoch": 9.2, |
| "grad_norm": 0.2573925852775574, |
| "learning_rate": 1.6112224448897796e-05, |
| "loss": 0.0069, |
| "step": 2300 |
| }, |
| { |
| "epoch": 9.24, |
| "grad_norm": 0.14938637614250183, |
| "learning_rate": 1.531062124248497e-05, |
| "loss": 0.0098, |
| "step": 2310 |
| }, |
| { |
| "epoch": 9.28, |
| "grad_norm": 0.2534259855747223, |
| "learning_rate": 1.4509018036072145e-05, |
| "loss": 0.0082, |
| "step": 2320 |
| }, |
| { |
| "epoch": 9.32, |
| "grad_norm": 0.22516509890556335, |
| "learning_rate": 1.3707414829659317e-05, |
| "loss": 0.0085, |
| "step": 2330 |
| }, |
| { |
| "epoch": 9.36, |
| "grad_norm": 0.1505204439163208, |
| "learning_rate": 1.2905811623246495e-05, |
| "loss": 0.0071, |
| "step": 2340 |
| }, |
| { |
| "epoch": 9.4, |
| "grad_norm": 0.2739148736000061, |
| "learning_rate": 1.2104208416833669e-05, |
| "loss": 0.0096, |
| "step": 2350 |
| }, |
| { |
| "epoch": 9.44, |
| "grad_norm": 0.6390639543533325, |
| "learning_rate": 1.1302605210420842e-05, |
| "loss": 0.0072, |
| "step": 2360 |
| }, |
| { |
| "epoch": 9.48, |
| "grad_norm": 0.3169389069080353, |
| "learning_rate": 1.0501002004008016e-05, |
| "loss": 0.0069, |
| "step": 2370 |
| }, |
| { |
| "epoch": 9.52, |
| "grad_norm": 0.20216785371303558, |
| "learning_rate": 9.699398797595192e-06, |
| "loss": 0.0069, |
| "step": 2380 |
| }, |
| { |
| "epoch": 9.56, |
| "grad_norm": 0.21013762056827545, |
| "learning_rate": 8.897795591182364e-06, |
| "loss": 0.0084, |
| "step": 2390 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.21212315559387207, |
| "learning_rate": 8.09619238476954e-06, |
| "loss": 0.0062, |
| "step": 2400 |
| }, |
| { |
| "epoch": 9.64, |
| "grad_norm": 0.23686246573925018, |
| "learning_rate": 7.294589178356714e-06, |
| "loss": 0.0102, |
| "step": 2410 |
| }, |
| { |
| "epoch": 9.68, |
| "grad_norm": 0.2729778587818146, |
| "learning_rate": 6.492985971943888e-06, |
| "loss": 0.007, |
| "step": 2420 |
| }, |
| { |
| "epoch": 9.72, |
| "grad_norm": 0.19149982929229736, |
| "learning_rate": 5.691382765531062e-06, |
| "loss": 0.0087, |
| "step": 2430 |
| }, |
| { |
| "epoch": 9.76, |
| "grad_norm": 0.20065714418888092, |
| "learning_rate": 4.889779559118237e-06, |
| "loss": 0.0067, |
| "step": 2440 |
| }, |
| { |
| "epoch": 9.8, |
| "grad_norm": 0.29255425930023193, |
| "learning_rate": 4.0881763527054114e-06, |
| "loss": 0.008, |
| "step": 2450 |
| }, |
| { |
| "epoch": 9.84, |
| "grad_norm": 0.254088819026947, |
| "learning_rate": 3.2865731462925853e-06, |
| "loss": 0.0065, |
| "step": 2460 |
| }, |
| { |
| "epoch": 9.88, |
| "grad_norm": 0.2548205554485321, |
| "learning_rate": 2.4849699398797596e-06, |
| "loss": 0.0082, |
| "step": 2470 |
| }, |
| { |
| "epoch": 9.92, |
| "grad_norm": 0.2607119679450989, |
| "learning_rate": 1.6833667334669339e-06, |
| "loss": 0.0091, |
| "step": 2480 |
| }, |
| { |
| "epoch": 9.96, |
| "grad_norm": 0.2837667465209961, |
| "learning_rate": 8.817635270541082e-07, |
| "loss": 0.0083, |
| "step": 2490 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.24088458716869354, |
| "learning_rate": 8.016032064128256e-08, |
| "loss": 0.0089, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.17341887028272e+17, |
| "train_batch_size": 20, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|