| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9985842378480414, | |
| "eval_steps": 500, | |
| "global_step": 3177, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009438414346389807, | |
| "grad_norm": 0.7047261682571029, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 0.3368, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.018876828692779613, | |
| "grad_norm": 0.44565794909772116, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.2696, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.028315243039169418, | |
| "grad_norm": 0.2469001773100438, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.2345, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.037753657385559226, | |
| "grad_norm": 0.17504888016598083, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.213, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04719207173194903, | |
| "grad_norm": 0.14319985578848382, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 0.194, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.056630486078338836, | |
| "grad_norm": 0.1594795618687463, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.1871, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06606890042472864, | |
| "grad_norm": 0.14625288121586646, | |
| "learning_rate": 1.4583333333333333e-05, | |
| "loss": 0.1791, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07550731477111845, | |
| "grad_norm": 0.44536813060583275, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.1777, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08494572911750826, | |
| "grad_norm": 0.17858201185676476, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.1743, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09438414346389806, | |
| "grad_norm": 0.22679600482590775, | |
| "learning_rate": 1.9999916822524766e-05, | |
| "loss": 0.173, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10382255781028787, | |
| "grad_norm": 0.2245846959065181, | |
| "learning_rate": 1.999898109181919e-05, | |
| "loss": 0.1697, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11326097215667767, | |
| "grad_norm": 0.21274961909318216, | |
| "learning_rate": 1.9997005756177228e-05, | |
| "loss": 0.1669, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12269938650306748, | |
| "grad_norm": 0.15535944242468744, | |
| "learning_rate": 1.999399102097668e-05, | |
| "loss": 0.1664, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13213780084945728, | |
| "grad_norm": 0.1677351780597522, | |
| "learning_rate": 1.9989937199662845e-05, | |
| "loss": 0.1652, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1415762151958471, | |
| "grad_norm": 0.15664753634679404, | |
| "learning_rate": 1.998484471371593e-05, | |
| "loss": 0.1619, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1510146295422369, | |
| "grad_norm": 0.1936710831336268, | |
| "learning_rate": 1.9978714092607234e-05, | |
| "loss": 0.1606, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.16045304388862672, | |
| "grad_norm": 0.21595388620143963, | |
| "learning_rate": 1.9971545973744102e-05, | |
| "loss": 0.16, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16989145823501653, | |
| "grad_norm": 0.19084712938417736, | |
| "learning_rate": 1.9963341102403652e-05, | |
| "loss": 0.1582, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1793298725814063, | |
| "grad_norm": 0.19755743286804991, | |
| "learning_rate": 1.9954100331655265e-05, | |
| "loss": 0.1551, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18876828692779613, | |
| "grad_norm": 0.21785901423412252, | |
| "learning_rate": 1.9943824622271934e-05, | |
| "loss": 0.1559, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19820670127418594, | |
| "grad_norm": 0.1472275149396309, | |
| "learning_rate": 1.9932515042630335e-05, | |
| "loss": 0.1534, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.20764511562057575, | |
| "grad_norm": 0.14184865805893884, | |
| "learning_rate": 1.9920172768599763e-05, | |
| "loss": 0.1545, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21708352996696556, | |
| "grad_norm": 0.1833363522186809, | |
| "learning_rate": 1.9906799083419865e-05, | |
| "loss": 0.1543, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22652194431335534, | |
| "grad_norm": 0.141212536462394, | |
| "learning_rate": 1.989239537756723e-05, | |
| "loss": 0.1538, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.23596035865974516, | |
| "grad_norm": 0.1556507383209179, | |
| "learning_rate": 1.987696314861082e-05, | |
| "loss": 0.1541, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.24539877300613497, | |
| "grad_norm": 0.17173283637998002, | |
| "learning_rate": 1.986050400105626e-05, | |
| "loss": 0.1518, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.25483718735252475, | |
| "grad_norm": 0.150629748875022, | |
| "learning_rate": 1.9843019646179014e-05, | |
| "loss": 0.1501, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.26427560169891456, | |
| "grad_norm": 0.19334006281958221, | |
| "learning_rate": 1.9824511901846475e-05, | |
| "loss": 0.1483, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2737140160453044, | |
| "grad_norm": 0.12527664802237196, | |
| "learning_rate": 1.9804982692328944e-05, | |
| "loss": 0.1514, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2831524303916942, | |
| "grad_norm": 0.19858199254198736, | |
| "learning_rate": 1.9784434048099565e-05, | |
| "loss": 0.151, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.292590844738084, | |
| "grad_norm": 0.1677799134496006, | |
| "learning_rate": 1.976286810562323e-05, | |
| "loss": 0.1498, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3020292590844738, | |
| "grad_norm": 0.15743349400071904, | |
| "learning_rate": 1.9740287107134417e-05, | |
| "loss": 0.1513, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3114676734308636, | |
| "grad_norm": 0.1616061413079364, | |
| "learning_rate": 1.97166934004041e-05, | |
| "loss": 0.1489, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.32090608777725343, | |
| "grad_norm": 0.1878283137943562, | |
| "learning_rate": 1.9692089438495622e-05, | |
| "loss": 0.1449, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.33034450212364325, | |
| "grad_norm": 0.16166637953640253, | |
| "learning_rate": 1.9666477779509655e-05, | |
| "loss": 0.1469, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.33978291647003306, | |
| "grad_norm": 0.12292532976422958, | |
| "learning_rate": 1.963986108631823e-05, | |
| "loss": 0.1468, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3492213308164228, | |
| "grad_norm": 0.16469874208354635, | |
| "learning_rate": 1.9612242126287876e-05, | |
| "loss": 0.1483, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3586597451628126, | |
| "grad_norm": 0.13369950841636608, | |
| "learning_rate": 1.958362377099191e-05, | |
| "loss": 0.1443, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.36809815950920244, | |
| "grad_norm": 0.12551292002845085, | |
| "learning_rate": 1.9554008995911837e-05, | |
| "loss": 0.1463, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.37753657385559225, | |
| "grad_norm": 0.14851921568961518, | |
| "learning_rate": 1.9523400880128032e-05, | |
| "loss": 0.1471, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.38697498820198206, | |
| "grad_norm": 0.13162403368451694, | |
| "learning_rate": 1.949180260599957e-05, | |
| "loss": 0.1452, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3964134025483719, | |
| "grad_norm": 0.12724887401811377, | |
| "learning_rate": 1.945921745883337e-05, | |
| "loss": 0.1455, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4058518168947617, | |
| "grad_norm": 0.11779906396951238, | |
| "learning_rate": 1.9425648826542618e-05, | |
| "loss": 0.1435, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4152902312411515, | |
| "grad_norm": 0.1610933457769652, | |
| "learning_rate": 1.939110019929451e-05, | |
| "loss": 0.1436, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4247286455875413, | |
| "grad_norm": 0.12250958203512534, | |
| "learning_rate": 1.935557516914739e-05, | |
| "loss": 0.1451, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4341670599339311, | |
| "grad_norm": 0.1380572524992858, | |
| "learning_rate": 1.931907742967727e-05, | |
| "loss": 0.1444, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.44360547428032093, | |
| "grad_norm": 0.13646993698111895, | |
| "learning_rate": 1.92816107755938e-05, | |
| "loss": 0.142, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4530438886267107, | |
| "grad_norm": 0.11765542306036501, | |
| "learning_rate": 1.9243179102345753e-05, | |
| "loss": 0.1406, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4624823029731005, | |
| "grad_norm": 0.1266567901893174, | |
| "learning_rate": 1.9203786405715984e-05, | |
| "loss": 0.144, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4719207173194903, | |
| "grad_norm": 0.1113634311573256, | |
| "learning_rate": 1.9163436781405992e-05, | |
| "loss": 0.1428, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4813591316658801, | |
| "grad_norm": 0.13808836428511967, | |
| "learning_rate": 1.912213442461009e-05, | |
| "loss": 0.1399, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.49079754601226994, | |
| "grad_norm": 0.1226613837593307, | |
| "learning_rate": 1.9079883629579224e-05, | |
| "loss": 0.1396, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5002359603586597, | |
| "grad_norm": 0.14272835200919645, | |
| "learning_rate": 1.9036688789174496e-05, | |
| "loss": 0.1403, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5096743747050495, | |
| "grad_norm": 0.12981510040553715, | |
| "learning_rate": 1.899255439441043e-05, | |
| "loss": 0.1399, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5191127890514393, | |
| "grad_norm": 0.1190871345092575, | |
| "learning_rate": 1.8947485033988034e-05, | |
| "loss": 0.1376, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5285512033978291, | |
| "grad_norm": 0.1271477738963388, | |
| "learning_rate": 1.8901485393817724e-05, | |
| "loss": 0.1415, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5379896177442189, | |
| "grad_norm": 0.12965211048846748, | |
| "learning_rate": 1.8854560256532098e-05, | |
| "loss": 0.1423, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5474280320906088, | |
| "grad_norm": 0.13373262160455968, | |
| "learning_rate": 1.880671450098871e-05, | |
| "loss": 0.139, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5568664464369986, | |
| "grad_norm": 0.1322939697550499, | |
| "learning_rate": 1.8757953101762786e-05, | |
| "loss": 0.1396, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5663048607833884, | |
| "grad_norm": 0.11918437239832326, | |
| "learning_rate": 1.8708281128630023e-05, | |
| "loss": 0.138, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5757432751297782, | |
| "grad_norm": 0.12338738357381479, | |
| "learning_rate": 1.865770374603948e-05, | |
| "loss": 0.1406, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.585181689476168, | |
| "grad_norm": 0.11573754594906395, | |
| "learning_rate": 1.8606226212576612e-05, | |
| "loss": 0.138, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5946201038225578, | |
| "grad_norm": 0.1419588706141848, | |
| "learning_rate": 1.8553853880416555e-05, | |
| "loss": 0.1408, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6040585181689476, | |
| "grad_norm": 0.13998266637185536, | |
| "learning_rate": 1.8500592194767625e-05, | |
| "loss": 0.1394, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6134969325153374, | |
| "grad_norm": 0.11868995175822014, | |
| "learning_rate": 1.8446446693305194e-05, | |
| "loss": 0.1384, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6229353468617272, | |
| "grad_norm": 0.1328472026088287, | |
| "learning_rate": 1.8391423005595928e-05, | |
| "loss": 0.1393, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6323737612081171, | |
| "grad_norm": 0.11726921800593894, | |
| "learning_rate": 1.833552685251246e-05, | |
| "loss": 0.1398, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6418121755545069, | |
| "grad_norm": 0.11466260187649016, | |
| "learning_rate": 1.827876404563861e-05, | |
| "loss": 0.1369, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6512505899008967, | |
| "grad_norm": 0.11234281014514101, | |
| "learning_rate": 1.8221140486665125e-05, | |
| "loss": 0.1346, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6606890042472865, | |
| "grad_norm": 0.11159741277810285, | |
| "learning_rate": 1.8162662166776085e-05, | |
| "loss": 0.1357, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6701274185936763, | |
| "grad_norm": 0.12752868267859116, | |
| "learning_rate": 1.8103335166026002e-05, | |
| "loss": 0.1389, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6795658329400661, | |
| "grad_norm": 0.12084535348559353, | |
| "learning_rate": 1.804316565270765e-05, | |
| "loss": 0.1375, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6890042472864559, | |
| "grad_norm": 0.12102077085461252, | |
| "learning_rate": 1.798215988271075e-05, | |
| "loss": 0.1364, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6984426616328456, | |
| "grad_norm": 0.11713742692774234, | |
| "learning_rate": 1.7920324198871546e-05, | |
| "loss": 0.138, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7078810759792354, | |
| "grad_norm": 0.11656355822805255, | |
| "learning_rate": 1.785766503031332e-05, | |
| "loss": 0.1346, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7173194903256253, | |
| "grad_norm": 0.11377328844943654, | |
| "learning_rate": 1.7794188891777964e-05, | |
| "loss": 0.1352, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7267579046720151, | |
| "grad_norm": 0.12103799646507679, | |
| "learning_rate": 1.7729902382948617e-05, | |
| "loss": 0.1353, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7361963190184049, | |
| "grad_norm": 0.1073585292390918, | |
| "learning_rate": 1.76648121877635e-05, | |
| "loss": 0.1352, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7456347333647947, | |
| "grad_norm": 0.11214075940260533, | |
| "learning_rate": 1.759892507372099e-05, | |
| "loss": 0.1341, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7550731477111845, | |
| "grad_norm": 0.11706899066793994, | |
| "learning_rate": 1.7532247891175968e-05, | |
| "loss": 0.1333, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7645115620575743, | |
| "grad_norm": 0.11789888505768062, | |
| "learning_rate": 1.746478757262761e-05, | |
| "loss": 0.136, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7739499764039641, | |
| "grad_norm": 0.11237848535926774, | |
| "learning_rate": 1.739655113199858e-05, | |
| "loss": 0.1336, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7833883907503539, | |
| "grad_norm": 0.10753154987431834, | |
| "learning_rate": 1.7327545663905813e-05, | |
| "loss": 0.1331, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7928268050967437, | |
| "grad_norm": 0.1441522552747225, | |
| "learning_rate": 1.7257778342922853e-05, | |
| "loss": 0.1328, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8022652194431336, | |
| "grad_norm": 0.1269707942863234, | |
| "learning_rate": 1.7187256422833928e-05, | |
| "loss": 0.1319, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8117036337895234, | |
| "grad_norm": 0.11091236494221275, | |
| "learning_rate": 1.711598723587975e-05, | |
| "loss": 0.1324, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8211420481359132, | |
| "grad_norm": 0.10854265306012167, | |
| "learning_rate": 1.7043978191995177e-05, | |
| "loss": 0.1325, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.830580462482303, | |
| "grad_norm": 0.1110467712060928, | |
| "learning_rate": 1.6971236778038806e-05, | |
| "loss": 0.1315, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8400188768286928, | |
| "grad_norm": 0.12129611756408008, | |
| "learning_rate": 1.6897770557014535e-05, | |
| "loss": 0.1328, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8494572911750826, | |
| "grad_norm": 0.106781748696916, | |
| "learning_rate": 1.682358716728525e-05, | |
| "loss": 0.1351, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8588957055214724, | |
| "grad_norm": 0.11020118439519076, | |
| "learning_rate": 1.674869432177864e-05, | |
| "loss": 0.1325, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8683341198678622, | |
| "grad_norm": 0.11750342557908768, | |
| "learning_rate": 1.667309980718529e-05, | |
| "loss": 0.1312, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.877772534214252, | |
| "grad_norm": 0.12853148875033116, | |
| "learning_rate": 1.6596811483149077e-05, | |
| "loss": 0.1317, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8872109485606419, | |
| "grad_norm": 0.11393786746070304, | |
| "learning_rate": 1.651983728145e-05, | |
| "loss": 0.1355, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8966493629070316, | |
| "grad_norm": 0.11013100846033319, | |
| "learning_rate": 1.6442185205179507e-05, | |
| "loss": 0.1309, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9060877772534214, | |
| "grad_norm": 0.10641286141618259, | |
| "learning_rate": 1.6363863327908405e-05, | |
| "loss": 0.1339, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9155261915998112, | |
| "grad_norm": 0.11308702339858176, | |
| "learning_rate": 1.6284879792847433e-05, | |
| "loss": 0.1299, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.924964605946201, | |
| "grad_norm": 0.11315250527471539, | |
| "learning_rate": 1.620524281200062e-05, | |
| "loss": 0.1305, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9344030202925908, | |
| "grad_norm": 0.0972875949252274, | |
| "learning_rate": 1.6124960665311447e-05, | |
| "loss": 0.1322, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9438414346389806, | |
| "grad_norm": 0.10070713866086979, | |
| "learning_rate": 1.6044041699802005e-05, | |
| "loss": 0.129, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9532798489853704, | |
| "grad_norm": 0.11109074990403733, | |
| "learning_rate": 1.5962494328705123e-05, | |
| "loss": 0.1321, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9627182633317602, | |
| "grad_norm": 0.1199186598391774, | |
| "learning_rate": 1.588032703058964e-05, | |
| "loss": 0.1334, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9721566776781501, | |
| "grad_norm": 0.10777396066893469, | |
| "learning_rate": 1.5797548348478893e-05, | |
| "loss": 0.1325, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.9815950920245399, | |
| "grad_norm": 0.11999882098060052, | |
| "learning_rate": 1.571416688896246e-05, | |
| "loss": 0.132, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9910335063709297, | |
| "grad_norm": 0.10911342083469809, | |
| "learning_rate": 1.563019132130136e-05, | |
| "loss": 0.1301, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0004719207173194, | |
| "grad_norm": 0.11482143235010223, | |
| "learning_rate": 1.5545630376526665e-05, | |
| "loss": 0.1282, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0099103350637093, | |
| "grad_norm": 0.11699392564682201, | |
| "learning_rate": 1.5460492846531748e-05, | |
| "loss": 0.1142, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.019348749410099, | |
| "grad_norm": 0.09530170230868218, | |
| "learning_rate": 1.5374787583158188e-05, | |
| "loss": 0.1157, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.028787163756489, | |
| "grad_norm": 0.09486822390799159, | |
| "learning_rate": 1.5288523497275392e-05, | |
| "loss": 0.1143, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0382255781028786, | |
| "grad_norm": 0.09531194088354993, | |
| "learning_rate": 1.5201709557854178e-05, | |
| "loss": 0.1128, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0476639924492686, | |
| "grad_norm": 0.11206649112299381, | |
| "learning_rate": 1.5114354791034225e-05, | |
| "loss": 0.1161, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.0571024067956583, | |
| "grad_norm": 0.10196697892456508, | |
| "learning_rate": 1.5026468279185615e-05, | |
| "loss": 0.1159, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.0665408211420482, | |
| "grad_norm": 0.10326390731228648, | |
| "learning_rate": 1.4938059159964555e-05, | |
| "loss": 0.1161, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.0759792354884379, | |
| "grad_norm": 0.09969628777021497, | |
| "learning_rate": 1.4849136625363297e-05, | |
| "loss": 0.1141, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.0854176498348278, | |
| "grad_norm": 0.0939091288214549, | |
| "learning_rate": 1.4759709920754453e-05, | |
| "loss": 0.1125, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.0948560641812175, | |
| "grad_norm": 0.09506323252337912, | |
| "learning_rate": 1.4669788343929736e-05, | |
| "loss": 0.1141, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1042944785276074, | |
| "grad_norm": 0.10154441851850998, | |
| "learning_rate": 1.4579381244133265e-05, | |
| "loss": 0.1128, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.1137328928739971, | |
| "grad_norm": 0.11822773479215737, | |
| "learning_rate": 1.4488498021089514e-05, | |
| "loss": 0.1137, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.123171307220387, | |
| "grad_norm": 0.1009800661484683, | |
| "learning_rate": 1.4397148124025997e-05, | |
| "loss": 0.1143, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.1326097215667768, | |
| "grad_norm": 0.10126420585679885, | |
| "learning_rate": 1.4305341050690845e-05, | |
| "loss": 0.117, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1420481359131667, | |
| "grad_norm": 0.09588874040008494, | |
| "learning_rate": 1.421308634636529e-05, | |
| "loss": 0.1137, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.1514865502595564, | |
| "grad_norm": 0.10922875430592754, | |
| "learning_rate": 1.412039360287126e-05, | |
| "loss": 0.1145, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.1609249646059463, | |
| "grad_norm": 0.11298890031757797, | |
| "learning_rate": 1.4027272457574082e-05, | |
| "loss": 0.1138, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.170363378952336, | |
| "grad_norm": 0.10857815603466196, | |
| "learning_rate": 1.3933732592380485e-05, | |
| "loss": 0.1135, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.1798017932987257, | |
| "grad_norm": 0.10213279825434321, | |
| "learning_rate": 1.3839783732731966e-05, | |
| "loss": 0.1134, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.1892402076451156, | |
| "grad_norm": 0.10027833977041692, | |
| "learning_rate": 1.3745435646593613e-05, | |
| "loss": 0.1136, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.1986786219915055, | |
| "grad_norm": 0.09590585357482817, | |
| "learning_rate": 1.3650698143438534e-05, | |
| "loss": 0.113, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.2081170363378952, | |
| "grad_norm": 0.10321073236266613, | |
| "learning_rate": 1.3555581073227942e-05, | |
| "loss": 0.1167, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.217555450684285, | |
| "grad_norm": 0.09327710523878686, | |
| "learning_rate": 1.346009432538705e-05, | |
| "loss": 0.1147, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.2269938650306749, | |
| "grad_norm": 0.0933025296287067, | |
| "learning_rate": 1.3364247827776854e-05, | |
| "loss": 0.1145, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2364322793770646, | |
| "grad_norm": 0.09493771082819326, | |
| "learning_rate": 1.3268051545661937e-05, | |
| "loss": 0.1141, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.2458706937234545, | |
| "grad_norm": 0.10053484854502866, | |
| "learning_rate": 1.3171515480674342e-05, | |
| "loss": 0.1122, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.2553091080698442, | |
| "grad_norm": 0.1108335770631105, | |
| "learning_rate": 1.3074649669773716e-05, | |
| "loss": 0.1173, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.2647475224162341, | |
| "grad_norm": 0.10521299166726314, | |
| "learning_rate": 1.297746418420374e-05, | |
| "loss": 0.1103, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.2741859367626238, | |
| "grad_norm": 0.10478814209881943, | |
| "learning_rate": 1.2879969128445025e-05, | |
| "loss": 0.1122, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.2836243511090137, | |
| "grad_norm": 0.0969588608522638, | |
| "learning_rate": 1.2782174639164528e-05, | |
| "loss": 0.1112, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.2930627654554034, | |
| "grad_norm": 0.10783687256200783, | |
| "learning_rate": 1.2684090884161636e-05, | |
| "loss": 0.1125, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.3025011798017934, | |
| "grad_norm": 0.10076692580892369, | |
| "learning_rate": 1.2585728061311003e-05, | |
| "loss": 0.1107, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.311939594148183, | |
| "grad_norm": 0.09895358395270354, | |
| "learning_rate": 1.248709639750228e-05, | |
| "loss": 0.1122, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.321378008494573, | |
| "grad_norm": 0.10215738990006902, | |
| "learning_rate": 1.2388206147576796e-05, | |
| "loss": 0.1124, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3308164228409627, | |
| "grad_norm": 0.09611638665301472, | |
| "learning_rate": 1.2289067593261358e-05, | |
| "loss": 0.1151, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.3402548371873526, | |
| "grad_norm": 0.09899073401009041, | |
| "learning_rate": 1.2189691042099265e-05, | |
| "loss": 0.1124, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.3496932515337423, | |
| "grad_norm": 0.1157109248340035, | |
| "learning_rate": 1.209008682637859e-05, | |
| "loss": 0.1154, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.359131665880132, | |
| "grad_norm": 0.09358448441833775, | |
| "learning_rate": 1.1990265302057948e-05, | |
| "loss": 0.1127, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.368570080226522, | |
| "grad_norm": 0.10318474117014907, | |
| "learning_rate": 1.1890236847689762e-05, | |
| "loss": 0.1134, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.3780084945729119, | |
| "grad_norm": 0.10507403584009326, | |
| "learning_rate": 1.1790011863341197e-05, | |
| "loss": 0.1145, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.3874469089193016, | |
| "grad_norm": 0.09210160678217687, | |
| "learning_rate": 1.1689600769512855e-05, | |
| "loss": 0.1128, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.3968853232656913, | |
| "grad_norm": 0.09890271495599744, | |
| "learning_rate": 1.1589014006055337e-05, | |
| "loss": 0.1158, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.4063237376120812, | |
| "grad_norm": 0.09768042621781026, | |
| "learning_rate": 1.1488262031083816e-05, | |
| "loss": 0.1107, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.415762151958471, | |
| "grad_norm": 0.09501221720590393, | |
| "learning_rate": 1.1387355319890685e-05, | |
| "loss": 0.1138, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4252005663048608, | |
| "grad_norm": 0.09098951194154016, | |
| "learning_rate": 1.1286304363856418e-05, | |
| "loss": 0.112, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.4346389806512505, | |
| "grad_norm": 0.08856225785605727, | |
| "learning_rate": 1.1185119669358792e-05, | |
| "loss": 0.1137, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.4440773949976404, | |
| "grad_norm": 0.09111265321801558, | |
| "learning_rate": 1.1083811756680523e-05, | |
| "loss": 0.1093, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.4535158093440301, | |
| "grad_norm": 0.09329783759350743, | |
| "learning_rate": 1.0982391158915441e-05, | |
| "loss": 0.1138, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.46295422369042, | |
| "grad_norm": 0.09114905230583735, | |
| "learning_rate": 1.0880868420873375e-05, | |
| "loss": 0.1135, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4723926380368098, | |
| "grad_norm": 0.10049302399783284, | |
| "learning_rate": 1.0779254097983788e-05, | |
| "loss": 0.1104, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.4818310523831997, | |
| "grad_norm": 0.08811790258148439, | |
| "learning_rate": 1.0677558755198327e-05, | |
| "loss": 0.114, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.4912694667295894, | |
| "grad_norm": 0.09402068008649977, | |
| "learning_rate": 1.0575792965892349e-05, | |
| "loss": 0.1112, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.500707881075979, | |
| "grad_norm": 0.09144293350756144, | |
| "learning_rate": 1.0473967310765629e-05, | |
| "loss": 0.1099, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.510146295422369, | |
| "grad_norm": 0.08887643971229772, | |
| "learning_rate": 1.0372092376742247e-05, | |
| "loss": 0.1109, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.519584709768759, | |
| "grad_norm": 0.09042876745354687, | |
| "learning_rate": 1.0270178755869861e-05, | |
| "loss": 0.1123, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.5290231241151486, | |
| "grad_norm": 0.08799872003450031, | |
| "learning_rate": 1.0168237044218452e-05, | |
| "loss": 0.1088, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.08558681617619375, | |
| "learning_rate": 1.0066277840778626e-05, | |
| "loss": 0.1125, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.5478999528079282, | |
| "grad_norm": 0.08917702916102198, | |
| "learning_rate": 9.964311746359631e-06, | |
| "loss": 0.1078, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.5573383671543182, | |
| "grad_norm": 0.09365014825092945, | |
| "learning_rate": 9.862349362487172e-06, | |
| "loss": 0.108, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.5667767815007079, | |
| "grad_norm": 0.08881624424784158, | |
| "learning_rate": 9.760401290301164e-06, | |
| "loss": 0.1073, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.5762151958470976, | |
| "grad_norm": 0.09040629927788134, | |
| "learning_rate": 9.658478129453532e-06, | |
| "loss": 0.1095, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.5856536101934875, | |
| "grad_norm": 0.09668654356646131, | |
| "learning_rate": 9.556590477006123e-06, | |
| "loss": 0.109, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.5950920245398774, | |
| "grad_norm": 0.08945527030759594, | |
| "learning_rate": 9.454748926328962e-06, | |
| "loss": 0.1111, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.6045304388862671, | |
| "grad_norm": 0.09096700102455489, | |
| "learning_rate": 9.352964065998801e-06, | |
| "loss": 0.1091, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.6139688532326568, | |
| "grad_norm": 0.098227701202526, | |
| "learning_rate": 9.251246478698242e-06, | |
| "loss": 0.1124, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.6234072675790467, | |
| "grad_norm": 0.08702267003826049, | |
| "learning_rate": 9.149606740115444e-06, | |
| "loss": 0.1091, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.6328456819254367, | |
| "grad_norm": 0.0912741057496456, | |
| "learning_rate": 9.04805541784454e-06, | |
| "loss": 0.1084, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.6422840962718264, | |
| "grad_norm": 0.09473199957469115, | |
| "learning_rate": 8.946603070286926e-06, | |
| "loss": 0.1071, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.651722510618216, | |
| "grad_norm": 0.09786418318351309, | |
| "learning_rate": 8.845260245553493e-06, | |
| "loss": 0.1106, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.661160924964606, | |
| "grad_norm": 0.09376970400308367, | |
| "learning_rate": 8.744037480367922e-06, | |
| "loss": 0.1095, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.670599339310996, | |
| "grad_norm": 0.09927967174299174, | |
| "learning_rate": 8.642945298971168e-06, | |
| "loss": 0.1086, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.6800377536573856, | |
| "grad_norm": 0.08941325714107755, | |
| "learning_rate": 8.54199421202726e-06, | |
| "loss": 0.1096, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.6894761680037753, | |
| "grad_norm": 0.09076378690689199, | |
| "learning_rate": 8.441194715530472e-06, | |
| "loss": 0.111, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.6989145823501652, | |
| "grad_norm": 0.08350889813597084, | |
| "learning_rate": 8.340557289714055e-06, | |
| "loss": 0.1089, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.708352996696555, | |
| "grad_norm": 0.1102767062542333, | |
| "learning_rate": 8.240092397960601e-06, | |
| "loss": 0.1077, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.7177914110429446, | |
| "grad_norm": 0.0928092514701947, | |
| "learning_rate": 8.139810485714142e-06, | |
| "loss": 0.109, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.7272298253893346, | |
| "grad_norm": 0.08827741386544802, | |
| "learning_rate": 8.03972197939414e-06, | |
| "loss": 0.1103, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.7366682397357245, | |
| "grad_norm": 0.09074532027658296, | |
| "learning_rate": 7.939837285311425e-06, | |
| "loss": 0.106, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.7461066540821142, | |
| "grad_norm": 0.08863060213083432, | |
| "learning_rate": 7.840166788586244e-06, | |
| "loss": 0.1111, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.7555450684285039, | |
| "grad_norm": 0.0873194235737418, | |
| "learning_rate": 7.740720852068524e-06, | |
| "loss": 0.1107, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.7649834827748938, | |
| "grad_norm": 0.09463118593541094, | |
| "learning_rate": 7.641509815260412e-06, | |
| "loss": 0.1067, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.7744218971212837, | |
| "grad_norm": 0.08277999614215281, | |
| "learning_rate": 7.542543993241278e-06, | |
| "loss": 0.1092, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.7838603114676734, | |
| "grad_norm": 0.08350764579820083, | |
| "learning_rate": 7.443833675595254e-06, | |
| "loss": 0.1033, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.7932987258140631, | |
| "grad_norm": 0.0892168369121696, | |
| "learning_rate": 7.3453891253413935e-06, | |
| "loss": 0.1088, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.802737140160453, | |
| "grad_norm": 0.09068391166704463, | |
| "learning_rate": 7.247220577866625e-06, | |
| "loss": 0.1074, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.812175554506843, | |
| "grad_norm": 0.09208367707491026, | |
| "learning_rate": 7.149338239861579e-06, | |
| "loss": 0.1069, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.8216139688532327, | |
| "grad_norm": 0.09334448658561058, | |
| "learning_rate": 7.051752288259366e-06, | |
| "loss": 0.1051, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.8310523831996224, | |
| "grad_norm": 0.0867013966015152, | |
| "learning_rate": 6.954472869177479e-06, | |
| "loss": 0.1071, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.8404907975460123, | |
| "grad_norm": 0.08513824070105314, | |
| "learning_rate": 6.857510096862901e-06, | |
| "loss": 0.108, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.8499292118924022, | |
| "grad_norm": 0.08880515925379688, | |
| "learning_rate": 6.760874052640494e-06, | |
| "loss": 0.1081, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.859367626238792, | |
| "grad_norm": 0.09395118992476542, | |
| "learning_rate": 6.664574783864862e-06, | |
| "loss": 0.1079, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.8688060405851816, | |
| "grad_norm": 0.09253843263366972, | |
| "learning_rate": 6.568622302875682e-06, | |
| "loss": 0.1068, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.8782444549315715, | |
| "grad_norm": 0.09103342170778085, | |
| "learning_rate": 6.473026585956736e-06, | |
| "loss": 0.106, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.8876828692779613, | |
| "grad_norm": 0.08266259287550586, | |
| "learning_rate": 6.377797572298661e-06, | |
| "loss": 0.1076, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.897121283624351, | |
| "grad_norm": 0.08360823987901486, | |
| "learning_rate": 6.282945162965548e-06, | |
| "loss": 0.1079, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.9065596979707409, | |
| "grad_norm": 0.09004863363551058, | |
| "learning_rate": 6.188479219865529e-06, | |
| "loss": 0.1064, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.9159981123171308, | |
| "grad_norm": 0.0931263680297109, | |
| "learning_rate": 6.094409564725435e-06, | |
| "loss": 0.1054, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.9254365266635205, | |
| "grad_norm": 0.09051504504706874, | |
| "learning_rate": 6.0007459780695885e-06, | |
| "loss": 0.1082, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.9348749410099102, | |
| "grad_norm": 0.0904194328886728, | |
| "learning_rate": 5.907498198202939e-06, | |
| "loss": 0.1081, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.9443133553563001, | |
| "grad_norm": 0.08711490496456058, | |
| "learning_rate": 5.8146759201985525e-06, | |
| "loss": 0.1069, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.95375176970269, | |
| "grad_norm": 0.08686808156213838, | |
| "learning_rate": 5.722288794889603e-06, | |
| "loss": 0.1064, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.9631901840490797, | |
| "grad_norm": 0.08817123970600604, | |
| "learning_rate": 5.630346427865965e-06, | |
| "loss": 0.1045, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.9726285983954694, | |
| "grad_norm": 0.08339231796976605, | |
| "learning_rate": 5.538858378475508e-06, | |
| "loss": 0.1066, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.9820670127418594, | |
| "grad_norm": 0.08824324960180577, | |
| "learning_rate": 5.447834158830202e-06, | |
| "loss": 0.1037, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.9915054270882493, | |
| "grad_norm": 0.09311929590923752, | |
| "learning_rate": 5.357283232817147e-06, | |
| "loss": 0.1054, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.0009438414346388, | |
| "grad_norm": 0.1334431719993605, | |
| "learning_rate": 5.267215015114574e-06, | |
| "loss": 0.1031, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.0103822557810287, | |
| "grad_norm": 0.10361861757914052, | |
| "learning_rate": 5.177638870213008e-06, | |
| "loss": 0.0868, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.0198206701274186, | |
| "grad_norm": 0.084172782289755, | |
| "learning_rate": 5.088564111441645e-06, | |
| "loss": 0.0834, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.0292590844738085, | |
| "grad_norm": 0.08727410339549913, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.0852, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.038697498820198, | |
| "grad_norm": 0.08994662846331379, | |
| "learning_rate": 4.911955743995042e-06, | |
| "loss": 0.0845, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.048135913166588, | |
| "grad_norm": 0.08715835430152602, | |
| "learning_rate": 4.824440497483802e-06, | |
| "loss": 0.0847, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.057574327512978, | |
| "grad_norm": 0.0943214139106939, | |
| "learning_rate": 4.737463359521618e-06, | |
| "loss": 0.0845, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.067012741859368, | |
| "grad_norm": 0.09049982951899538, | |
| "learning_rate": 4.6510333732160915e-06, | |
| "loss": 0.085, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.0764511562057573, | |
| "grad_norm": 0.08823142440331375, | |
| "learning_rate": 4.565159524786888e-06, | |
| "loss": 0.0867, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.085889570552147, | |
| "grad_norm": 0.08727162151071796, | |
| "learning_rate": 4.479850742631396e-06, | |
| "loss": 0.0834, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.095327984898537, | |
| "grad_norm": 0.08243182067038303, | |
| "learning_rate": 4.395115896396457e-06, | |
| "loss": 0.0849, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.104766399244927, | |
| "grad_norm": 0.08720723034547441, | |
| "learning_rate": 4.310963796056168e-06, | |
| "loss": 0.084, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.1142048135913165, | |
| "grad_norm": 0.08588933137845103, | |
| "learning_rate": 4.227403190995901e-06, | |
| "loss": 0.0875, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.1236432279377064, | |
| "grad_norm": 0.09200761679022347, | |
| "learning_rate": 4.14444276910263e-06, | |
| "loss": 0.0853, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.1330816422840964, | |
| "grad_norm": 0.08831298949051568, | |
| "learning_rate": 4.06209115586162e-06, | |
| "loss": 0.0867, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.1425200566304863, | |
| "grad_norm": 0.0893828115241757, | |
| "learning_rate": 3.980356913459642e-06, | |
| "loss": 0.0865, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.1519584709768758, | |
| "grad_norm": 0.09119946740323005, | |
| "learning_rate": 3.899248539894756e-06, | |
| "loss": 0.0848, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.1613968853232657, | |
| "grad_norm": 0.08882008929472095, | |
| "learning_rate": 3.818774468092754e-06, | |
| "loss": 0.0843, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.1708352996696556, | |
| "grad_norm": 0.08922739717614447, | |
| "learning_rate": 3.738943065030376e-06, | |
| "loss": 0.0811, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.180273714016045, | |
| "grad_norm": 0.08622476744102522, | |
| "learning_rate": 3.659762630865411e-06, | |
| "loss": 0.083, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.189712128362435, | |
| "grad_norm": 0.08399251697806781, | |
| "learning_rate": 3.5812413980736916e-06, | |
| "loss": 0.0827, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.199150542708825, | |
| "grad_norm": 0.09006770579644241, | |
| "learning_rate": 3.5033875305931662e-06, | |
| "loss": 0.0849, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.208588957055215, | |
| "grad_norm": 0.08747795506814363, | |
| "learning_rate": 3.4262091229750973e-06, | |
| "loss": 0.0822, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.2180273714016043, | |
| "grad_norm": 0.08840099962821243, | |
| "learning_rate": 3.3497141995424397e-06, | |
| "loss": 0.0835, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.2274657857479943, | |
| "grad_norm": 0.08986511740506226, | |
| "learning_rate": 3.2739107135555603e-06, | |
| "loss": 0.0841, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.236904200094384, | |
| "grad_norm": 0.08924913381765429, | |
| "learning_rate": 3.1988065463853204e-06, | |
| "loss": 0.0849, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.246342614440774, | |
| "grad_norm": 0.08622955784811655, | |
| "learning_rate": 3.1244095066936396e-06, | |
| "loss": 0.0848, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.2557810287871636, | |
| "grad_norm": 0.08819044440789944, | |
| "learning_rate": 3.050727329621637e-06, | |
| "loss": 0.0835, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.2652194431335535, | |
| "grad_norm": 0.08900963097417651, | |
| "learning_rate": 2.977767675985377e-06, | |
| "loss": 0.0805, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.2746578574799434, | |
| "grad_norm": 0.0889820260869723, | |
| "learning_rate": 2.905538131479376e-06, | |
| "loss": 0.0844, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.2840962718263333, | |
| "grad_norm": 0.08508956855605507, | |
| "learning_rate": 2.8340462058879214e-06, | |
| "loss": 0.082, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.293534686172723, | |
| "grad_norm": 0.0859290434178059, | |
| "learning_rate": 2.76329933230425e-06, | |
| "loss": 0.0819, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.3029731005191127, | |
| "grad_norm": 0.08310233373376713, | |
| "learning_rate": 2.6933048663577297e-06, | |
| "loss": 0.0811, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.3124115148655027, | |
| "grad_norm": 0.08745596999299074, | |
| "learning_rate": 2.6240700854490988e-06, | |
| "loss": 0.0824, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.3218499292118926, | |
| "grad_norm": 0.08451577327209396, | |
| "learning_rate": 2.5556021879938074e-06, | |
| "loss": 0.0828, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.331288343558282, | |
| "grad_norm": 0.09129945645731877, | |
| "learning_rate": 2.4879082926735974e-06, | |
| "loss": 0.0837, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.340726757904672, | |
| "grad_norm": 0.083910957600724, | |
| "learning_rate": 2.4209954376963797e-06, | |
| "loss": 0.0816, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.350165172251062, | |
| "grad_norm": 0.08447673558716574, | |
| "learning_rate": 2.354870580064439e-06, | |
| "loss": 0.0808, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.3596035865974514, | |
| "grad_norm": 0.08671014168934867, | |
| "learning_rate": 2.289540594851122e-06, | |
| "loss": 0.0814, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.3690420009438413, | |
| "grad_norm": 0.08155772855747594, | |
| "learning_rate": 2.225012274486028e-06, | |
| "loss": 0.0791, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.3784804152902312, | |
| "grad_norm": 0.08555794455636312, | |
| "learning_rate": 2.1612923280487883e-06, | |
| "loss": 0.0843, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.387918829636621, | |
| "grad_norm": 0.08566290155470521, | |
| "learning_rate": 2.0983873805715216e-06, | |
| "loss": 0.0837, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.397357243983011, | |
| "grad_norm": 0.08531077534490512, | |
| "learning_rate": 2.0363039723500155e-06, | |
| "loss": 0.0838, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.4067956583294006, | |
| "grad_norm": 0.08375539552341336, | |
| "learning_rate": 1.9750485582637245e-06, | |
| "loss": 0.0822, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.4162340726757905, | |
| "grad_norm": 0.08503293134727295, | |
| "learning_rate": 1.9146275071046626e-06, | |
| "loss": 0.0849, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.4256724870221804, | |
| "grad_norm": 0.08165485153496227, | |
| "learning_rate": 1.8550471009152138e-06, | |
| "loss": 0.0803, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.43511090136857, | |
| "grad_norm": 0.08309504205099255, | |
| "learning_rate": 1.7963135343349914e-06, | |
| "loss": 0.0789, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.44454931571496, | |
| "grad_norm": 0.08294260063335909, | |
| "learning_rate": 1.73843291395678e-06, | |
| "loss": 0.0823, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.4539877300613497, | |
| "grad_norm": 0.08537750016844735, | |
| "learning_rate": 1.6814112576916142e-06, | |
| "loss": 0.0825, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.4634261444077397, | |
| "grad_norm": 0.08433197560779272, | |
| "learning_rate": 1.6252544941430982e-06, | |
| "loss": 0.0813, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.472864558754129, | |
| "grad_norm": 0.08321923421106348, | |
| "learning_rate": 1.5699684619909983e-06, | |
| "loss": 0.0826, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.482302973100519, | |
| "grad_norm": 0.08373732680479463, | |
| "learning_rate": 1.5155589093841939e-06, | |
| "loss": 0.0802, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.491741387446909, | |
| "grad_norm": 0.09112397953039225, | |
| "learning_rate": 1.4620314933430269e-06, | |
| "loss": 0.081, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.501179801793299, | |
| "grad_norm": 0.08246005822696881, | |
| "learning_rate": 1.4093917791711497e-06, | |
| "loss": 0.0808, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.5106182161396884, | |
| "grad_norm": 0.08399111524479627, | |
| "learning_rate": 1.357645239876879e-06, | |
| "loss": 0.08, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.5200566304860783, | |
| "grad_norm": 0.08520663335129833, | |
| "learning_rate": 1.3067972556041753e-06, | |
| "loss": 0.0818, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.5294950448324682, | |
| "grad_norm": 0.08299784168003543, | |
| "learning_rate": 1.2568531130732498e-06, | |
| "loss": 0.0807, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.5389334591788577, | |
| "grad_norm": 0.08594453806774255, | |
| "learning_rate": 1.207818005030904e-06, | |
| "loss": 0.0802, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.5483718735252476, | |
| "grad_norm": 0.08039958287229476, | |
| "learning_rate": 1.1596970297106458e-06, | |
| "loss": 0.0818, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.5578102878716376, | |
| "grad_norm": 0.08085895400758615, | |
| "learning_rate": 1.1124951903025981e-06, | |
| "loss": 0.0806, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.5672487022180275, | |
| "grad_norm": 0.08458215660632902, | |
| "learning_rate": 1.0662173944333288e-06, | |
| "loss": 0.081, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.5766871165644174, | |
| "grad_norm": 0.0826446041832577, | |
| "learning_rate": 1.0208684536555968e-06, | |
| "loss": 0.081, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.586125530910807, | |
| "grad_norm": 0.08442284791430928, | |
| "learning_rate": 9.764530829480822e-07, | |
| "loss": 0.0832, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.595563945257197, | |
| "grad_norm": 0.08336636617446731, | |
| "learning_rate": 9.329759002251726e-07, | |
| "loss": 0.0802, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.6050023596035867, | |
| "grad_norm": 0.0846992268529094, | |
| "learning_rate": 8.904414258568306e-07, | |
| "loss": 0.0799, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.614440773949976, | |
| "grad_norm": 0.08300701320734614, | |
| "learning_rate": 8.488540821986035e-07, | |
| "loss": 0.0827, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.623879188296366, | |
| "grad_norm": 0.08443465909555346, | |
| "learning_rate": 8.082181931318311e-07, | |
| "loss": 0.0792, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.633317602642756, | |
| "grad_norm": 0.08476186997139065, | |
| "learning_rate": 7.685379836140872e-07, | |
| "loss": 0.079, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.642756016989146, | |
| "grad_norm": 0.08472149270611445, | |
| "learning_rate": 7.298175792398976e-07, | |
| "loss": 0.0818, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.652194431335536, | |
| "grad_norm": 0.08032205169166948, | |
| "learning_rate": 6.920610058118105e-07, | |
| "loss": 0.0804, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.6616328456819254, | |
| "grad_norm": 0.08367697987102687, | |
| "learning_rate": 6.552721889218194e-07, | |
| "loss": 0.0816, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.6710712600283153, | |
| "grad_norm": 0.08668064800964889, | |
| "learning_rate": 6.194549535432137e-07, | |
| "loss": 0.08, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.680509674374705, | |
| "grad_norm": 0.0828804628433115, | |
| "learning_rate": 5.846130236329073e-07, | |
| "loss": 0.0823, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.6899480887210947, | |
| "grad_norm": 0.08378018124842795, | |
| "learning_rate": 5.507500217442341e-07, | |
| "loss": 0.0809, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.6993865030674846, | |
| "grad_norm": 0.08220414182971268, | |
| "learning_rate": 5.178694686503205e-07, | |
| "loss": 0.0784, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.7088249174138745, | |
| "grad_norm": 0.08076213129882256, | |
| "learning_rate": 4.85974782978027e-07, | |
| "loss": 0.081, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.718263331760264, | |
| "grad_norm": 0.08189865863600006, | |
| "learning_rate": 4.5506928085250033e-07, | |
| "loss": 0.0778, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.727701746106654, | |
| "grad_norm": 0.08777777733586507, | |
| "learning_rate": 4.251561755524036e-07, | |
| "loss": 0.0832, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.737140160453044, | |
| "grad_norm": 0.08438414185840341, | |
| "learning_rate": 3.9623857717581813e-07, | |
| "loss": 0.0808, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.746578574799434, | |
| "grad_norm": 0.08245920785308986, | |
| "learning_rate": 3.6831949231689203e-07, | |
| "loss": 0.081, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.7560169891458237, | |
| "grad_norm": 0.08356304284398074, | |
| "learning_rate": 3.414018237532335e-07, | |
| "loss": 0.0821, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.765455403492213, | |
| "grad_norm": 0.08633702580648989, | |
| "learning_rate": 3.154883701441136e-07, | |
| "loss": 0.08, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.774893817838603, | |
| "grad_norm": 0.0821243443006684, | |
| "learning_rate": 2.905818257394799e-07, | |
| "loss": 0.0798, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.784332232184993, | |
| "grad_norm": 0.08233971677127812, | |
| "learning_rate": 2.666847800998362e-07, | |
| "loss": 0.0819, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.7937706465313825, | |
| "grad_norm": 0.08176593022884952, | |
| "learning_rate": 2.437997178270035e-07, | |
| "loss": 0.0807, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.8032090608777724, | |
| "grad_norm": 0.08485236840414098, | |
| "learning_rate": 2.219290183057865e-07, | |
| "loss": 0.0806, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.8126474752241624, | |
| "grad_norm": 0.08134044819035506, | |
| "learning_rate": 2.0107495545659829e-07, | |
| "loss": 0.0778, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.8220858895705523, | |
| "grad_norm": 0.08200978384685277, | |
| "learning_rate": 1.8123969749902714e-07, | |
| "loss": 0.0777, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.831524303916942, | |
| "grad_norm": 0.08409107175117113, | |
| "learning_rate": 1.6242530672641143e-07, | |
| "loss": 0.0813, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.8409627182633317, | |
| "grad_norm": 0.08288246905003016, | |
| "learning_rate": 1.4463373929141766e-07, | |
| "loss": 0.0788, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.8504011326097216, | |
| "grad_norm": 0.08212980978941326, | |
| "learning_rate": 1.2786684500265546e-07, | |
| "loss": 0.0819, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.8598395469561115, | |
| "grad_norm": 0.0832626354470357, | |
| "learning_rate": 1.1212636713235581e-07, | |
| "loss": 0.0794, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.869277961302501, | |
| "grad_norm": 0.0836378792472762, | |
| "learning_rate": 9.741394223512057e-08, | |
| "loss": 0.0814, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.878716375648891, | |
| "grad_norm": 0.08017963643030881, | |
| "learning_rate": 8.373109997776185e-08, | |
| "loss": 0.0804, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.888154789995281, | |
| "grad_norm": 0.08105963755024889, | |
| "learning_rate": 7.10792629802659e-08, | |
| "loss": 0.08, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.8975932043416703, | |
| "grad_norm": 0.08196798527897163, | |
| "learning_rate": 5.945974666788479e-08, | |
| "loss": 0.0809, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.9070316186880603, | |
| "grad_norm": 0.08445010118063752, | |
| "learning_rate": 4.887375913436132e-08, | |
| "loss": 0.0817, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.91647003303445, | |
| "grad_norm": 0.0830340739185833, | |
| "learning_rate": 3.932240101633178e-08, | |
| "loss": 0.0823, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.92590844738084, | |
| "grad_norm": 0.08107816362561618, | |
| "learning_rate": 3.0806665378884106e-08, | |
| "loss": 0.0764, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.93534686172723, | |
| "grad_norm": 0.08498347611137054, | |
| "learning_rate": 2.33274376123116e-08, | |
| "loss": 0.0784, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.9447852760736195, | |
| "grad_norm": 0.08214101571060746, | |
| "learning_rate": 1.68854953400599e-08, | |
| "loss": 0.0795, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.9542236904200094, | |
| "grad_norm": 0.0849230883706386, | |
| "learning_rate": 1.1481508337869429e-08, | |
| "loss": 0.0801, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.9636621047663994, | |
| "grad_norm": 0.09022687172778407, | |
| "learning_rate": 7.1160384641455475e-09, | |
| "loss": 0.0828, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.973100519112789, | |
| "grad_norm": 0.08399540827662202, | |
| "learning_rate": 3.7895396015374955e-09, | |
| "loss": 0.0808, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.9825389334591788, | |
| "grad_norm": 0.08027494442622467, | |
| "learning_rate": 1.502357609749483e-09, | |
| "loss": 0.0803, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.9919773478055687, | |
| "grad_norm": 0.08480380742956374, | |
| "learning_rate": 2.5473028957945234e-10, | |
| "loss": 0.0813, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.9985842378480414, | |
| "step": 3177, | |
| "total_flos": 5.875342281120154e+16, | |
| "train_loss": 0.11385704865424108, | |
| "train_runtime": 162519.6807, | |
| "train_samples_per_second": 3.441, | |
| "train_steps_per_second": 0.02 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3177, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.875342281120154e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |