| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0282, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002, |
| "grad_norm": 0.992521345615387, |
| "learning_rate": 1.8e-07, |
| "loss": 0.4669, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 2.713249444961548, |
| "learning_rate": 3.8e-07, |
| "loss": 0.46, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 6.9339165687561035, |
| "learning_rate": 5.800000000000001e-07, |
| "loss": 0.4543, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 1.508054494857788, |
| "learning_rate": 7.8e-07, |
| "loss": 0.4686, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.1499743461608887, |
| "learning_rate": 9.800000000000001e-07, |
| "loss": 0.4627, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 2.000009298324585, |
| "learning_rate": 1.1800000000000001e-06, |
| "loss": 0.4668, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 2.90346097946167, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 0.4672, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 1.8733686208724976, |
| "learning_rate": 1.5800000000000001e-06, |
| "loss": 0.442, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 7.299347400665283, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 0.4537, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.2114304304122925, |
| "learning_rate": 1.98e-06, |
| "loss": 0.4392, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 4.244724273681641, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 0.4467, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 1.6745773553848267, |
| "learning_rate": 2.38e-06, |
| "loss": 0.3962, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 2.175783395767212, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 0.3613, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 1.944590449333191, |
| "learning_rate": 2.7800000000000005e-06, |
| "loss": 0.267, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.9925225377082825, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 0.2213, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.1854593753814697, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 0.2109, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 1.1748063564300537, |
| "learning_rate": 3.3800000000000007e-06, |
| "loss": 0.2003, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 1.274276852607727, |
| "learning_rate": 3.58e-06, |
| "loss": 0.1888, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 1.1525161266326904, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.1868, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 1.7498775720596313, |
| "learning_rate": 3.980000000000001e-06, |
| "loss": 0.1841, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 0.8628786206245422, |
| "learning_rate": 4.18e-06, |
| "loss": 0.1798, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 2.097649097442627, |
| "learning_rate": 4.38e-06, |
| "loss": 0.1746, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 3.168224811553955, |
| "learning_rate": 4.58e-06, |
| "loss": 0.1718, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 1.1456739902496338, |
| "learning_rate": 4.78e-06, |
| "loss": 0.1761, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.7046711444854736, |
| "learning_rate": 4.980000000000001e-06, |
| "loss": 0.1715, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 1.0530240535736084, |
| "learning_rate": 5.18e-06, |
| "loss": 0.1646, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 1.4000672101974487, |
| "learning_rate": 5.380000000000001e-06, |
| "loss": 0.169, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.9671304821968079, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 0.1658, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 4.116588115692139, |
| "learning_rate": 5.78e-06, |
| "loss": 0.1633, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.6324472427368164, |
| "learning_rate": 5.98e-06, |
| "loss": 0.1686, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 1.066884160041809, |
| "learning_rate": 6.18e-06, |
| "loss": 0.1578, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.9248769879341125, |
| "learning_rate": 6.380000000000001e-06, |
| "loss": 0.1648, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 2.3996987342834473, |
| "learning_rate": 6.5800000000000005e-06, |
| "loss": 0.1584, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 2.0868091583251953, |
| "learning_rate": 6.780000000000001e-06, |
| "loss": 0.1497, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 3.3375916481018066, |
| "learning_rate": 6.98e-06, |
| "loss": 0.1576, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 1.1031235456466675, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 0.1588, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 6.909502983093262, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 0.1505, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 1.235683560371399, |
| "learning_rate": 7.58e-06, |
| "loss": 0.1465, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 1.2270852327346802, |
| "learning_rate": 7.78e-06, |
| "loss": 0.1486, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.0695114135742188, |
| "learning_rate": 7.980000000000002e-06, |
| "loss": 0.1525, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 2.8632586002349854, |
| "learning_rate": 8.18e-06, |
| "loss": 0.147, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 1.0436551570892334, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.1445, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 1.166566014289856, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.1542, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 1.5501545667648315, |
| "learning_rate": 8.78e-06, |
| "loss": 0.1512, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.8002007007598877, |
| "learning_rate": 8.98e-06, |
| "loss": 0.1416, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 0.8401817083358765, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.1517, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 1.2032701969146729, |
| "learning_rate": 9.38e-06, |
| "loss": 0.1451, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.753799319267273, |
| "learning_rate": 9.58e-06, |
| "loss": 0.1401, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 1.1318485736846924, |
| "learning_rate": 9.780000000000001e-06, |
| "loss": 0.1378, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.041433572769165, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.1459, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 2.9767746925354004, |
| "learning_rate": 9.999901304280686e-06, |
| "loss": 0.1432, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.9433181881904602, |
| "learning_rate": 9.999560138895238e-06, |
| "loss": 0.1441, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 2.254661798477173, |
| "learning_rate": 9.99897530200195e-06, |
| "loss": 0.1411, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 1.0642566680908203, |
| "learning_rate": 9.998146822104943e-06, |
| "loss": 0.146, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.9729284048080444, |
| "learning_rate": 9.997074739583162e-06, |
| "loss": 0.1404, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.6401363015174866, |
| "learning_rate": 9.995759106688394e-06, |
| "loss": 0.1354, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 1.005709171295166, |
| "learning_rate": 9.99419998754273e-06, |
| "loss": 0.1421, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 0.7156584858894348, |
| "learning_rate": 9.992397458135438e-06, |
| "loss": 0.144, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.9248397946357727, |
| "learning_rate": 9.990351606319261e-06, |
| "loss": 0.1362, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.142520785331726, |
| "learning_rate": 9.988062531806127e-06, |
| "loss": 0.1333, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 0.9766596555709839, |
| "learning_rate": 9.9855303461623e-06, |
| "loss": 0.1329, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 0.8085676431655884, |
| "learning_rate": 9.982755172802933e-06, |
| "loss": 0.1343, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 1.0947911739349365, |
| "learning_rate": 9.979737146986064e-06, |
| "loss": 0.1312, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.9679737091064453, |
| "learning_rate": 9.976476415806013e-06, |
| "loss": 0.1391, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.8831957578659058, |
| "learning_rate": 9.972973138186217e-06, |
| "loss": 0.14, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 1.0070087909698486, |
| "learning_rate": 9.969227484871485e-06, |
| "loss": 0.1343, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.8003564476966858, |
| "learning_rate": 9.965239638419673e-06, |
| "loss": 0.1372, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.680647611618042, |
| "learning_rate": 9.961009793192793e-06, |
| "loss": 0.1395, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 1.1533617973327637, |
| "learning_rate": 9.956538155347534e-06, |
| "loss": 0.1398, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.7727952003479004, |
| "learning_rate": 9.951824942825215e-06, |
| "loss": 0.1331, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 0.8858327269554138, |
| "learning_rate": 9.946870385341167e-06, |
| "loss": 0.1282, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.917654812335968, |
| "learning_rate": 9.94167472437353e-06, |
| "loss": 0.1411, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 1.409077763557434, |
| "learning_rate": 9.936238213151491e-06, |
| "loss": 0.1263, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 0.8429045081138611, |
| "learning_rate": 9.930561116642936e-06, |
| "loss": 0.1339, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.2110062837600708, |
| "learning_rate": 9.92464371154154e-06, |
| "loss": 0.1302, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.842542052268982, |
| "learning_rate": 9.918486286253279e-06, |
| "loss": 0.1335, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.8756665587425232, |
| "learning_rate": 9.912089140882377e-06, |
| "loss": 0.1315, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.587119996547699, |
| "learning_rate": 9.90545258721667e-06, |
| "loss": 0.1294, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 1.0943330526351929, |
| "learning_rate": 9.898576948712427e-06, |
| "loss": 0.134, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5449488759040833, |
| "learning_rate": 9.891462560478562e-06, |
| "loss": 0.1273, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 1.0867772102355957, |
| "learning_rate": 9.884109769260326e-06, |
| "loss": 0.136, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.6655702590942383, |
| "learning_rate": 9.876518933422385e-06, |
| "loss": 0.1345, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.9336721897125244, |
| "learning_rate": 9.868690422931372e-06, |
| "loss": 0.1377, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.5859615206718445, |
| "learning_rate": 9.860624619337844e-06, |
| "loss": 0.131, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.1051822900772095, |
| "learning_rate": 9.852321915757688e-06, |
| "loss": 0.1359, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 0.5312075614929199, |
| "learning_rate": 9.843782716852963e-06, |
| "loss": 0.1364, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.5661536455154419, |
| "learning_rate": 9.835007438812177e-06, |
| "loss": 0.134, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.4673188626766205, |
| "learning_rate": 9.825996509330001e-06, |
| "loss": 0.1311, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.7434860467910767, |
| "learning_rate": 9.816750367586424e-06, |
| "loss": 0.1333, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.071921467781067, |
| "learning_rate": 9.807269464225355e-06, |
| "loss": 0.1293, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.7527667284011841, |
| "learning_rate": 9.797554261332644e-06, |
| "loss": 0.1291, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.7582711577415466, |
| "learning_rate": 9.787605232413575e-06, |
| "loss": 0.1314, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.7601514458656311, |
| "learning_rate": 9.777422862369782e-06, |
| "loss": 0.1428, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 1.272291898727417, |
| "learning_rate": 9.767007647475618e-06, |
| "loss": 0.1241, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.4984325170516968, |
| "learning_rate": 9.756360095353957e-06, |
| "loss": 0.1293, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.3648125231266022, |
| "learning_rate": 9.745480724951473e-06, |
| "loss": 0.1278, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.5616574287414551, |
| "learning_rate": 9.73437006651333e-06, |
| "loss": 0.1327, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.4720121920108795, |
| "learning_rate": 9.723028661557345e-06, |
| "loss": 0.1258, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.4209226071834564, |
| "learning_rate": 9.711457062847596e-06, |
| "loss": 0.1272, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 18.944120407104492, |
| "learning_rate": 9.699655834367479e-06, |
| "loss": 0.1307, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.4856315851211548, |
| "learning_rate": 9.687625551292219e-06, |
| "loss": 0.1322, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 0.5057471990585327, |
| "learning_rate": 9.675366799960842e-06, |
| "loss": 0.1271, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.41341617703437805, |
| "learning_rate": 9.662880177847595e-06, |
| "loss": 0.1313, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.8846428394317627, |
| "learning_rate": 9.650166293532822e-06, |
| "loss": 0.1248, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.40669843554496765, |
| "learning_rate": 9.637225766673309e-06, |
| "loss": 0.1219, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.7833669781684875, |
| "learning_rate": 9.624059227972077e-06, |
| "loss": 0.1326, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 1.194365382194519, |
| "learning_rate": 9.610667319147648e-06, |
| "loss": 0.1298, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.47751522064208984, |
| "learning_rate": 9.597050692902765e-06, |
| "loss": 0.1254, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 0.7141849398612976, |
| "learning_rate": 9.583210012892582e-06, |
| "loss": 0.1332, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.166374921798706, |
| "learning_rate": 9.569145953692316e-06, |
| "loss": 0.1205, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.33305805921554565, |
| "learning_rate": 9.554859200764371e-06, |
| "loss": 0.1242, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.3989585340023041, |
| "learning_rate": 9.540350450424927e-06, |
| "loss": 0.1253, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.503139853477478, |
| "learning_rate": 9.525620409810009e-06, |
| "loss": 0.1226, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.4812481105327606, |
| "learning_rate": 9.510669796841014e-06, |
| "loss": 0.1222, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.9657276272773743, |
| "learning_rate": 9.495499340189729e-06, |
| "loss": 0.1256, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.4245375096797943, |
| "learning_rate": 9.480109779242805e-06, |
| "loss": 0.1333, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.694359302520752, |
| "learning_rate": 9.464501864065735e-06, |
| "loss": 0.123, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 0.5880414843559265, |
| "learning_rate": 9.448676355366282e-06, |
| "loss": 0.1263, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.6939809322357178, |
| "learning_rate": 9.432634024457414e-06, |
| "loss": 0.1211, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.87459796667099, |
| "learning_rate": 9.41637565321971e-06, |
| "loss": 0.1176, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 1.157516598701477, |
| "learning_rate": 9.399902034063244e-06, |
| "loss": 0.1196, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.602496862411499, |
| "learning_rate": 9.383213969888972e-06, |
| "loss": 0.1291, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.8280712366104126, |
| "learning_rate": 9.366312274049602e-06, |
| "loss": 0.1154, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.6725120544433594, |
| "learning_rate": 9.349197770309942e-06, |
| "loss": 0.1214, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.46815815567970276, |
| "learning_rate": 9.33187129280676e-06, |
| "loss": 0.123, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 0.4778998792171478, |
| "learning_rate": 9.314333686008125e-06, |
| "loss": 0.1268, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.6865217685699463, |
| "learning_rate": 9.296585804672253e-06, |
| "loss": 0.1226, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.408330500125885, |
| "learning_rate": 9.278628513805838e-06, |
| "loss": 0.1173, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 0.4890899956226349, |
| "learning_rate": 9.260462688621906e-06, |
| "loss": 0.1242, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.4487694501876831, |
| "learning_rate": 9.242089214497146e-06, |
| "loss": 0.1199, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 0.5977551937103271, |
| "learning_rate": 9.223508986928766e-06, |
| "loss": 0.119, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.5191870331764221, |
| "learning_rate": 9.204722911490847e-06, |
| "loss": 0.1209, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 0.427013635635376, |
| "learning_rate": 9.1857319037902e-06, |
| "loss": 0.1208, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 0.7320643663406372, |
| "learning_rate": 9.16653688942175e-06, |
| "loss": 0.13, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.5645836591720581, |
| "learning_rate": 9.147138803923417e-06, |
| "loss": 0.126, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.6136955618858337, |
| "learning_rate": 9.12753859273052e-06, |
| "loss": 0.1193, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 0.884530782699585, |
| "learning_rate": 9.107737211129702e-06, |
| "loss": 0.119, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 0.5332031846046448, |
| "learning_rate": 9.087735624212365e-06, |
| "loss": 0.1256, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 0.4672992527484894, |
| "learning_rate": 9.06753480682764e-06, |
| "loss": 0.1284, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.3032495379447937, |
| "learning_rate": 9.047135743534866e-06, |
| "loss": 0.1237, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 0.2789173126220703, |
| "learning_rate": 9.026539428555609e-06, |
| "loss": 0.1228, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 0.4365665316581726, |
| "learning_rate": 9.005746865725206e-06, |
| "loss": 0.1195, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 0.4817768633365631, |
| "learning_rate": 8.984759068443832e-06, |
| "loss": 0.1268, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.5241479873657227, |
| "learning_rate": 8.963577059627117e-06, |
| "loss": 0.1227, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.40581586956977844, |
| "learning_rate": 8.942201871656292e-06, |
| "loss": 0.1251, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 0.3088854253292084, |
| "learning_rate": 8.920634546327857e-06, |
| "loss": 0.1194, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 0.40653863549232483, |
| "learning_rate": 8.898876134802827e-06, |
| "loss": 0.1209, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.3182103633880615, |
| "learning_rate": 8.87692769755548e-06, |
| "loss": 0.1222, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 1.059717059135437, |
| "learning_rate": 8.854790304321682e-06, |
| "loss": 0.1229, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.4621742069721222, |
| "learning_rate": 8.83246503404675e-06, |
| "loss": 0.1279, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 0.3916736841201782, |
| "learning_rate": 8.80995297483286e-06, |
| "loss": 0.1233, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.32610222697257996, |
| "learning_rate": 8.78725522388602e-06, |
| "loss": 0.1193, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 0.5615723133087158, |
| "learning_rate": 8.764372887462587e-06, |
| "loss": 0.1235, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.5475795865058899, |
| "learning_rate": 8.741307080815357e-06, |
| "loss": 0.1207, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.30787885189056396, |
| "learning_rate": 8.718058928139205e-06, |
| "loss": 0.1206, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.8395270705223083, |
| "learning_rate": 8.694629562516295e-06, |
| "loss": 0.1262, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 0.4411715567111969, |
| "learning_rate": 8.671020125860851e-06, |
| "loss": 0.1126, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 0.3818731904029846, |
| "learning_rate": 8.647231768863513e-06, |
| "loss": 0.1184, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 1.530585527420044, |
| "learning_rate": 8.623265650935233e-06, |
| "loss": 0.1197, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.40408292412757874, |
| "learning_rate": 8.599122940150795e-06, |
| "loss": 0.1208, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 0.6378718614578247, |
| "learning_rate": 8.574804813191859e-06, |
| "loss": 0.1164, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 0.4105075001716614, |
| "learning_rate": 8.550312455289624e-06, |
| "loss": 0.1213, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 0.4447656273841858, |
| "learning_rate": 8.525647060167063e-06, |
| "loss": 0.125, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.4272136986255646, |
| "learning_rate": 8.500809829980734e-06, |
| "loss": 0.1152, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.5936419367790222, |
| "learning_rate": 8.4758019752622e-06, |
| "loss": 0.1122, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 0.5025408864021301, |
| "learning_rate": 8.450624714859016e-06, |
| "loss": 0.1212, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 1.1935460567474365, |
| "learning_rate": 8.425279275875336e-06, |
| "loss": 0.1183, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.33655646443367004, |
| "learning_rate": 8.399766893612096e-06, |
| "loss": 0.1172, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 0.44501084089279175, |
| "learning_rate": 8.374088811506819e-06, |
| "loss": 0.1222, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.42191627621650696, |
| "learning_rate": 8.348246281072998e-06, |
| "loss": 0.1175, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 0.43164587020874023, |
| "learning_rate": 8.32224056183911e-06, |
| "loss": 0.1157, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.5635483860969543, |
| "learning_rate": 8.296072921287217e-06, |
| "loss": 0.1199, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.5875423550605774, |
| "learning_rate": 8.269744634791207e-06, |
| "loss": 0.1189, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.6257967948913574, |
| "learning_rate": 8.243256985554622e-06, |
| "loss": 0.12, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5064549446105957, |
| "learning_rate": 8.21661126454811e-06, |
| "loss": 0.1205, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.4206721782684326, |
| "learning_rate": 8.189808770446528e-06, |
| "loss": 0.1195, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 1.636749029159546, |
| "learning_rate": 8.162850809565623e-06, |
| "loss": 0.1135, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 0.6703956723213196, |
| "learning_rate": 8.135738695798377e-06, |
| "loss": 0.1216, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 0.7827121615409851, |
| "learning_rate": 8.108473750550965e-06, |
| "loss": 0.123, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.3569027781486511, |
| "learning_rate": 8.081057302678352e-06, |
| "loss": 0.1101, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 0.36223459243774414, |
| "learning_rate": 8.053490688419532e-06, |
| "loss": 0.1222, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.40967607498168945, |
| "learning_rate": 8.02577525133239e-06, |
| "loss": 0.123, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 1.797508955001831, |
| "learning_rate": 7.997912342228232e-06, |
| "loss": 0.1146, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.3894302546977997, |
| "learning_rate": 7.969903319105935e-06, |
| "loss": 0.117, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.47636857628822327, |
| "learning_rate": 7.941749547085778e-06, |
| "loss": 0.1175, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 5.96767520904541, |
| "learning_rate": 7.913452398342882e-06, |
| "loss": 0.1151, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 0.8992507457733154, |
| "learning_rate": 7.88501325204036e-06, |
| "loss": 0.1208, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.37870603799819946, |
| "learning_rate": 7.856433494262078e-06, |
| "loss": 0.119, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 0.3793867826461792, |
| "learning_rate": 7.827714517945116e-06, |
| "loss": 0.1176, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5373750925064087, |
| "learning_rate": 7.798857722811857e-06, |
| "loss": 0.1228, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 0.5349807143211365, |
| "learning_rate": 7.769864515301787e-06, |
| "loss": 0.1245, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.5244778394699097, |
| "learning_rate": 7.740736308502939e-06, |
| "loss": 0.1206, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.9785746932029724, |
| "learning_rate": 7.711474522083015e-06, |
| "loss": 0.1189, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 0.7063882946968079, |
| "learning_rate": 7.682080582220206e-06, |
| "loss": 0.1182, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 3.1905932426452637, |
| "learning_rate": 7.652555921533671e-06, |
| "loss": 0.114, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.5251575112342834, |
| "learning_rate": 7.622901979013717e-06, |
| "loss": 0.1167, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 0.6657569408416748, |
| "learning_rate": 7.5931201999516715e-06, |
| "loss": 0.1182, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 0.3866606056690216, |
| "learning_rate": 7.563212035869426e-06, |
| "loss": 0.1242, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 0.4084101915359497, |
| "learning_rate": 7.533178944448705e-06, |
| "loss": 0.1165, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.4045194983482361, |
| "learning_rate": 7.503022389460014e-06, |
| "loss": 0.1224, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.6457034945487976, |
| "learning_rate": 7.4727438406912986e-06, |
| "loss": 0.1187, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.5980866551399231, |
| "learning_rate": 7.44234477387631e-06, |
| "loss": 0.1179, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 0.3560180962085724, |
| "learning_rate": 7.411826670622676e-06, |
| "loss": 0.1134, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.4002629816532135, |
| "learning_rate": 7.381191018339697e-06, |
| "loss": 0.1175, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.36518630385398865, |
| "learning_rate": 7.350439310165842e-06, |
| "loss": 0.1102, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 1.9818588495254517, |
| "learning_rate": 7.319573044895986e-06, |
| "loss": 0.1151, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 0.4517176151275635, |
| "learning_rate": 7.288593726908351e-06, |
| "loss": 0.1207, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.40875038504600525, |
| "learning_rate": 7.257502866091192e-06, |
| "loss": 0.113, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 0.6046447157859802, |
| "learning_rate": 7.226301977769199e-06, |
| "loss": 0.1169, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.6228557825088501, |
| "learning_rate": 7.194992582629654e-06, |
| "loss": 0.1151, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 0.4920182526111603, |
| "learning_rate": 7.1635762066483035e-06, |
| "loss": 0.1202, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.3749644458293915, |
| "learning_rate": 7.1320543810149945e-06, |
| "loss": 0.1082, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 0.5646130442619324, |
| "learning_rate": 7.100428642059033e-06, |
| "loss": 0.1173, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 0.6624546051025391, |
| "learning_rate": 7.0687005311743195e-06, |
| "loss": 0.1159, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.4650940895080566, |
| "learning_rate": 7.036871594744218e-06, |
| "loss": 0.1237, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.35690367221832275, |
| "learning_rate": 7.0049433840661875e-06, |
| "loss": 0.1158, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 0.4230184555053711, |
| "learning_rate": 6.97291745527617e-06, |
| "loss": 0.1148, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 0.44549325108528137, |
| "learning_rate": 6.940795369272754e-06, |
| "loss": 0.1237, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 0.29287219047546387, |
| "learning_rate": 6.908578691641092e-06, |
| "loss": 0.1134, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.41562554240226746, |
| "learning_rate": 6.876268992576605e-06, |
| "loss": 0.1128, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 0.35246309638023376, |
| "learning_rate": 6.843867846808438e-06, |
| "loss": 0.1141, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 2.075042247772217, |
| "learning_rate": 6.811376833522729e-06, |
| "loss": 0.1147, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 1.0448739528656006, |
| "learning_rate": 6.778797536285625e-06, |
| "loss": 0.1135, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.6575298309326172, |
| "learning_rate": 6.746131542966112e-06, |
| "loss": 0.1169, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.2195367813110352, |
| "learning_rate": 6.713380445658618e-06, |
| "loss": 0.1133, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 1.0849454402923584, |
| "learning_rate": 6.680545840605423e-06, |
| "loss": 0.1159, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 0.557558000087738, |
| "learning_rate": 6.647629328118852e-06, |
| "loss": 0.1189, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.4906410276889801, |
| "learning_rate": 6.614632512503289e-06, |
| "loss": 0.1118, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.5431510210037231, |
| "learning_rate": 6.58155700197697e-06, |
| "loss": 0.1154, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.4932880401611328, |
| "learning_rate": 6.548404408593622e-06, |
| "loss": 0.1154, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.9299081563949585, |
| "learning_rate": 6.5151763481638705e-06, |
| "loss": 0.1149, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.39536532759666443, |
| "learning_rate": 6.481874440176506e-06, |
| "loss": 0.1208, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.3463858366012573, |
| "learning_rate": 6.448500307719537e-06, |
| "loss": 0.1139, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 0.3795383870601654, |
| "learning_rate": 6.415055577401101e-06, |
| "loss": 0.1119, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.30274516344070435, |
| "learning_rate": 6.3815418792701686e-06, |
| "loss": 0.1103, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.7076600193977356, |
| "learning_rate": 6.3479608467371055e-06, |
| "loss": 0.1137, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 0.6139280796051025, |
| "learning_rate": 6.314314116494061e-06, |
| "loss": 0.1139, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 1.3072528839111328, |
| "learning_rate": 6.280603328435199e-06, |
| "loss": 0.1074, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.7616708874702454, |
| "learning_rate": 6.24683012557677e-06, |
| "loss": 0.1164, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5742841958999634, |
| "learning_rate": 6.212996153977038e-06, |
| "loss": 0.12, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 0.3245879113674164, |
| "learning_rate": 6.179103062656042e-06, |
| "loss": 0.1081, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 0.46111881732940674, |
| "learning_rate": 6.145152503515239e-06, |
| "loss": 0.1158, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 0.39089033007621765, |
| "learning_rate": 6.111146131256983e-06, |
| "loss": 0.1238, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.4809490144252777, |
| "learning_rate": 6.077085603303883e-06, |
| "loss": 0.1101, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.3588216006755829, |
| "learning_rate": 6.04297257971802e-06, |
| "loss": 0.1118, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 0.2877720892429352, |
| "learning_rate": 6.008808723120035e-06, |
| "loss": 0.1158, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 0.7675597667694092, |
| "learning_rate": 5.974595698608103e-06, |
| "loss": 0.1119, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.9885912537574768, |
| "learning_rate": 5.94033517367677e-06, |
| "loss": 0.1119, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 0.6791333556175232, |
| "learning_rate": 5.906028818135687e-06, |
| "loss": 0.1165, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.42410117387771606, |
| "learning_rate": 5.871678304028224e-06, |
| "loss": 0.1027, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 0.4329153001308441, |
| "learning_rate": 5.837285305549978e-06, |
| "loss": 0.1091, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.6316914558410645, |
| "learning_rate": 5.802851498967173e-06, |
| "loss": 0.1144, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 0.29134896397590637, |
| "learning_rate": 5.768378562534962e-06, |
| "loss": 0.1179, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 0.4128161668777466, |
| "learning_rate": 5.733868176415633e-06, |
| "loss": 0.1125, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.44271254539489746, |
| "learning_rate": 5.6993220225967214e-06, |
| "loss": 0.1081, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.5136324763298035, |
| "learning_rate": 5.6647417848090225e-06, |
| "loss": 0.1115, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 0.5882424712181091, |
| "learning_rate": 5.630129148444543e-06, |
| "loss": 0.1086, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 0.4773464798927307, |
| "learning_rate": 5.59548580047435e-06, |
| "loss": 0.1077, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 0.48995503783226013, |
| "learning_rate": 5.560813429366345e-06, |
| "loss": 0.1098, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.9368364810943604, |
| "learning_rate": 5.526113725002984e-06, |
| "loss": 0.1173, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 0.4998178780078888, |
| "learning_rate": 5.491388378598899e-06, |
| "loss": 0.1115, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 0.3727714717388153, |
| "learning_rate": 5.456639082618489e-06, |
| "loss": 0.1047, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 0.5195613503456116, |
| "learning_rate": 5.4218675306934145e-06, |
| "loss": 0.1087, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 1.1314338445663452, |
| "learning_rate": 5.3870754175400595e-06, |
| "loss": 0.1127, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.6690823435783386, |
| "learning_rate": 5.352264438876935e-06, |
| "loss": 0.1098, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 0.3548797369003296, |
| "learning_rate": 5.317436291342031e-06, |
| "loss": 0.1099, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 0.34155887365341187, |
| "learning_rate": 5.282592672410124e-06, |
| "loss": 0.1185, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.3449115455150604, |
| "learning_rate": 5.247735280310041e-06, |
| "loss": 0.1075, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 0.5513066053390503, |
| "learning_rate": 5.212865813941899e-06, |
| "loss": 0.1152, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.496774286031723, |
| "learning_rate": 5.177985972794293e-06, |
| "loss": 0.1053, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 0.47940221428871155, |
| "learning_rate": 5.143097456861474e-06, |
| "loss": 0.1096, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.26659730076789856, |
| "learning_rate": 5.1082019665604895e-06, |
| "loss": 0.1097, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 0.359784871339798, |
| "learning_rate": 5.073301202648304e-06, |
| "loss": 0.1004, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 1.3557486534118652, |
| "learning_rate": 5.038396866138915e-06, |
| "loss": 0.118, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.3325219750404358, |
| "learning_rate": 5.003490658220438e-06, |
| "loss": 0.1116, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.5154880881309509, |
| "learning_rate": 4.968584280172206e-06, |
| "loss": 0.1022, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 1.5963865518569946, |
| "learning_rate": 4.933679433281837e-06, |
| "loss": 0.1166, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 1.1274834871292114, |
| "learning_rate": 4.898777818762325e-06, |
| "loss": 0.1136, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 0.4707079529762268, |
| "learning_rate": 4.863881137669123e-06, |
| "loss": 0.1094, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.31004512310028076, |
| "learning_rate": 4.828991090817238e-06, |
| "loss": 0.1086, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 0.5906158685684204, |
| "learning_rate": 4.794109378698327e-06, |
| "loss": 0.121, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 0.44631025195121765, |
| "learning_rate": 4.759237701397831e-06, |
| "loss": 0.1148, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 1.058272361755371, |
| "learning_rate": 4.7243777585121034e-06, |
| "loss": 0.1113, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.7068793773651123, |
| "learning_rate": 4.689531249065581e-06, |
| "loss": 0.1066, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.47186627984046936, |
| "learning_rate": 4.654699871427972e-06, |
| "loss": 0.1177, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 0.45073261857032776, |
| "learning_rate": 4.619885323231484e-06, |
| "loss": 0.1134, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 0.5080240964889526, |
| "learning_rate": 4.5850893012880806e-06, |
| "loss": 0.1183, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.48024675250053406, |
| "learning_rate": 4.5503135015067815e-06, |
| "loss": 0.1132, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 0.4118949770927429, |
| "learning_rate": 4.5155596188110055e-06, |
| "loss": 0.1094, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.4416310489177704, |
| "learning_rate": 4.4808293470559645e-06, |
| "loss": 0.1178, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 0.3114508092403412, |
| "learning_rate": 4.446124378946108e-06, |
| "loss": 0.1216, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.48362258076667786, |
| "learning_rate": 4.4114464059526185e-06, |
| "loss": 0.0995, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 0.34811100363731384, |
| "learning_rate": 4.376797118230978e-06, |
| "loss": 0.1089, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 0.3432498276233673, |
| "learning_rate": 4.342178204538588e-06, |
| "loss": 0.1141, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.7286999225616455, |
| "learning_rate": 4.307591352152459e-06, |
| "loss": 0.1142, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.3824271261692047, |
| "learning_rate": 4.273038246786986e-06, |
| "loss": 0.1146, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 0.5760470032691956, |
| "learning_rate": 4.238520572511773e-06, |
| "loss": 0.1125, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 0.29192543029785156, |
| "learning_rate": 4.204040011669567e-06, |
| "loss": 0.1143, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 0.4159230887889862, |
| "learning_rate": 4.169598244794261e-06, |
| "loss": 0.109, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.27462849020957947, |
| "learning_rate": 4.135196950528982e-06, |
| "loss": 0.1157, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 0.4015858471393585, |
| "learning_rate": 4.100837805544279e-06, |
| "loss": 0.1019, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 0.6288099884986877, |
| "learning_rate": 4.066522484456406e-06, |
| "loss": 0.1119, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 0.7088296413421631, |
| "learning_rate": 4.032252659745699e-06, |
| "loss": 0.1119, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 2.2175350189208984, |
| "learning_rate": 3.9980300016750696e-06, |
| "loss": 0.1135, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.2995910346508026, |
| "learning_rate": 3.963856178208588e-06, |
| "loss": 0.1068, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 0.4773070514202118, |
| "learning_rate": 3.9297328549302e-06, |
| "loss": 0.1118, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 0.3807465732097626, |
| "learning_rate": 3.895661694962542e-06, |
| "loss": 0.1115, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.5447002649307251, |
| "learning_rate": 3.86164435888588e-06, |
| "loss": 0.1148, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 0.6512142419815063, |
| "learning_rate": 3.827682504657187e-06, |
| "loss": 0.1143, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.4874299168586731, |
| "learning_rate": 3.793777787529325e-06, |
| "loss": 0.1151, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 0.4157492220401764, |
| "learning_rate": 3.759931859970374e-06, |
| "loss": 0.1108, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.4890842139720917, |
| "learning_rate": 3.7261463715830902e-06, |
| "loss": 0.1114, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 0.3871977627277374, |
| "learning_rate": 3.6924229690245163e-06, |
| "loss": 0.1112, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 0.3650118410587311, |
| "learning_rate": 3.6587632959257168e-06, |
| "loss": 0.1051, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.2773863971233368, |
| "learning_rate": 3.625168992811671e-06, |
| "loss": 0.1125, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.3802359402179718, |
| "learning_rate": 3.5916416970213173e-06, |
| "loss": 0.1073, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 0.5529680252075195, |
| "learning_rate": 3.5581830426277554e-06, |
| "loss": 0.1132, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 0.33904126286506653, |
| "learning_rate": 3.524794660358593e-06, |
| "loss": 0.1107, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 0.7427698373794556, |
| "learning_rate": 3.491478177516484e-06, |
| "loss": 0.1086, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.3335733711719513, |
| "learning_rate": 3.4582352178997937e-06, |
| "loss": 0.1104, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 0.4681756794452667, |
| "learning_rate": 3.4250674017234774e-06, |
| "loss": 0.117, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 0.7870750427246094, |
| "learning_rate": 3.3919763455401016e-06, |
| "loss": 0.1141, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 0.4855883717536926, |
| "learning_rate": 3.358963662161062e-06, |
| "loss": 0.1049, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 1.0050535202026367, |
| "learning_rate": 3.3260309605779717e-06, |
| "loss": 0.1097, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.3933185935020447, |
| "learning_rate": 3.293179845884245e-06, |
| "loss": 0.1152, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 0.38565793633461, |
| "learning_rate": 3.260411919196866e-06, |
| "loss": 0.1094, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 0.4234326481819153, |
| "learning_rate": 3.227728777578353e-06, |
| "loss": 0.1168, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.5142901539802551, |
| "learning_rate": 3.195132013958918e-06, |
| "loss": 0.1114, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 0.3599749505519867, |
| "learning_rate": 3.1626232170588343e-06, |
| "loss": 0.1078, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.3898942172527313, |
| "learning_rate": 3.130203971310999e-06, |
| "loss": 0.113, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 1.5486271381378174, |
| "learning_rate": 3.097875856783713e-06, |
| "loss": 0.109, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 1.6251258850097656, |
| "learning_rate": 3.0656404491036696e-06, |
| "loss": 0.116, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 3.3787240982055664, |
| "learning_rate": 3.033499319379163e-06, |
| "loss": 0.1083, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 0.4697399437427521, |
| "learning_rate": 3.001454034123512e-06, |
| "loss": 0.117, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.35250768065452576, |
| "learning_rate": 2.969506155178711e-06, |
| "loss": 0.1091, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.7035725116729736, |
| "learning_rate": 2.9376572396393047e-06, |
| "loss": 0.111, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 0.4109930992126465, |
| "learning_rate": 2.905908839776509e-06, |
| "loss": 0.1106, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 0.43214330077171326, |
| "learning_rate": 2.874262502962537e-06, |
| "loss": 0.1072, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 0.3473849296569824, |
| "learning_rate": 2.8427197715952047e-06, |
| "loss": 0.1158, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.40624290704727173, |
| "learning_rate": 2.811282183022736e-06, |
| "loss": 0.1139, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 3.3285329341888428, |
| "learning_rate": 2.779951269468847e-06, |
| "loss": 0.1162, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 0.624461829662323, |
| "learning_rate": 2.7487285579580635e-06, |
| "loss": 0.1084, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 1.3213962316513062, |
| "learning_rate": 2.717615570241294e-06, |
| "loss": 0.1098, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.3557455539703369, |
| "learning_rate": 2.686613822721666e-06, |
| "loss": 0.113, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.4707227051258087, |
| "learning_rate": 2.6557248263806175e-06, |
| "loss": 0.1135, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 0.5622274875640869, |
| "learning_rate": 2.6249500867042523e-06, |
| "loss": 0.1224, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 0.43661999702453613, |
| "learning_rate": 2.5942911036099657e-06, |
| "loss": 0.1178, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.8577882051467896, |
| "learning_rate": 2.5637493713733376e-06, |
| "loss": 0.1101, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 0.5041825771331787, |
| "learning_rate": 2.533326378555314e-06, |
| "loss": 0.1115, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.3203236758708954, |
| "learning_rate": 2.5030236079296443e-06, |
| "loss": 0.1093, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 0.37466859817504883, |
| "learning_rate": 2.4728425364106136e-06, |
| "loss": 0.1063, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.7311847805976868, |
| "learning_rate": 2.442784634981071e-06, |
| "loss": 0.1092, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 0.7368522882461548, |
| "learning_rate": 2.412851368620726e-06, |
| "loss": 0.1094, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 1.2340730428695679, |
| "learning_rate": 2.3830441962347528e-06, |
| "loss": 0.1141, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.4553486406803131, |
| "learning_rate": 2.353364570582681e-06, |
| "loss": 0.1092, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.49003028869628906, |
| "learning_rate": 2.323813938207593e-06, |
| "loss": 0.1071, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 0.8033204674720764, |
| "learning_rate": 2.294393739365621e-06, |
| "loss": 0.1159, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 0.3968663513660431, |
| "learning_rate": 2.265105407955752e-06, |
| "loss": 0.1162, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 0.567513108253479, |
| "learning_rate": 2.235950371449938e-06, |
| "loss": 0.1125, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.4921203851699829, |
| "learning_rate": 2.2069300508235273e-06, |
| "loss": 0.1131, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 2.202014684677124, |
| "learning_rate": 2.1780458604860056e-06, |
| "loss": 0.1093, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 0.5519041419029236, |
| "learning_rate": 2.14929920821206e-06, |
| "loss": 0.1158, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 0.41222497820854187, |
| "learning_rate": 2.1206914950729673e-06, |
| "loss": 0.1095, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 1.8621835708618164, |
| "learning_rate": 2.0922241153683064e-06, |
| "loss": 0.1148, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.47333869338035583, |
| "learning_rate": 2.063898456558002e-06, |
| "loss": 0.1089, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 0.37637799978256226, |
| "learning_rate": 2.035715899194704e-06, |
| "loss": 0.106, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 1.6549164056777954, |
| "learning_rate": 2.007677816856498e-06, |
| "loss": 0.104, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.3913106918334961, |
| "learning_rate": 1.979785576079961e-06, |
| "loss": 0.1041, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 0.498811811208725, |
| "learning_rate": 1.95204053629356e-06, |
| "loss": 0.1137, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.3852579593658447, |
| "learning_rate": 1.9244440497513895e-06, |
| "loss": 0.1018, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 0.5074270963668823, |
| "learning_rate": 1.896997461467272e-06, |
| "loss": 0.1101, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.6303516626358032, |
| "learning_rate": 1.8697021091491991e-06, |
| "loss": 0.1088, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 0.8138649463653564, |
| "learning_rate": 1.842559323134136e-06, |
| "loss": 0.1072, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 0.4117524027824402, |
| "learning_rate": 1.8155704263231777e-06, |
| "loss": 0.1024, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.3261663913726807, |
| "learning_rate": 1.7887367341170781e-06, |
| "loss": 0.1093, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.4887453317642212, |
| "learning_rate": 1.762059554352143e-06, |
| "loss": 0.1145, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 0.7797528505325317, |
| "learning_rate": 1.7355401872364759e-06, |
| "loss": 0.1124, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 0.5533345937728882, |
| "learning_rate": 1.709179925286617e-06, |
| "loss": 0.1132, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 0.8638681173324585, |
| "learning_rate": 1.6829800532645447e-06, |
| "loss": 0.1168, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.38189423084259033, |
| "learning_rate": 1.6569418481150596e-06, |
| "loss": 0.1083, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 0.5026066303253174, |
| "learning_rate": 1.6310665789035468e-06, |
| "loss": 0.109, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 0.5332008004188538, |
| "learning_rate": 1.605355506754121e-06, |
| "loss": 0.1096, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 0.5544666051864624, |
| "learning_rate": 1.5798098847881664e-06, |
| "loss": 0.1038, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.5581374764442444, |
| "learning_rate": 1.554430958063259e-06, |
| "loss": 0.1069, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.4732300341129303, |
| "learning_rate": 1.529219963512481e-06, |
| "loss": 0.1097, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 0.46628376841545105, |
| "learning_rate": 1.5041781298841424e-06, |
| "loss": 0.1193, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 0.4595516622066498, |
| "learning_rate": 1.4793066776818843e-06, |
| "loss": 0.1128, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.9645623564720154, |
| "learning_rate": 1.4546068191051988e-06, |
| "loss": 0.1119, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 0.5310599207878113, |
| "learning_rate": 1.4300797579903476e-06, |
| "loss": 0.1086, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.710594892501831, |
| "learning_rate": 1.4057266897516842e-06, |
| "loss": 0.1183, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 0.37046968936920166, |
| "learning_rate": 1.3815488013233986e-06, |
| "loss": 0.1056, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.3346898555755615, |
| "learning_rate": 1.3575472711016634e-06, |
| "loss": 0.106, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 0.34566453099250793, |
| "learning_rate": 1.333723268887201e-06, |
| "loss": 0.104, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 0.696998655796051, |
| "learning_rate": 1.3100779558282673e-06, |
| "loss": 0.1046, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.35373494029045105, |
| "learning_rate": 1.2866124843640614e-06, |
| "loss": 0.1091, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.5243257284164429, |
| "learning_rate": 1.2633279981685608e-06, |
| "loss": 0.1152, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 0.4453822672367096, |
| "learning_rate": 1.240225632094773e-06, |
| "loss": 0.1098, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 0.566088080406189, |
| "learning_rate": 1.217306512119425e-06, |
| "loss": 0.1168, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 0.7452012896537781, |
| "learning_rate": 1.1945717552880919e-06, |
| "loss": 0.1085, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4251628816127777, |
| "learning_rate": 1.1720224696607474e-06, |
| "loss": 0.1076, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 0.6231107711791992, |
| "learning_rate": 1.1496597542577603e-06, |
| "loss": 0.1077, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 0.6129289865493774, |
| "learning_rate": 1.1274846990063314e-06, |
| "loss": 0.1075, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 0.35106995701789856, |
| "learning_rate": 1.1054983846873684e-06, |
| "loss": 0.105, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.4739697575569153, |
| "learning_rate": 1.0837018828828133e-06, |
| "loss": 0.1058, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.47443220019340515, |
| "learning_rate": 1.0620962559234144e-06, |
| "loss": 0.1067, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 0.47327733039855957, |
| "learning_rate": 1.0406825568369478e-06, |
| "loss": 0.1097, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 0.5550324320793152, |
| "learning_rate": 1.0194618292968972e-06, |
| "loss": 0.1033, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.8000510334968567, |
| "learning_rate": 9.984351075715848e-07, |
| "loss": 0.1049, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 0.4578244686126709, |
| "learning_rate": 9.77603416473763e-07, |
| "loss": 0.1107, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.5600775480270386, |
| "learning_rate": 9.569677713106673e-07, |
| "loss": 0.1089, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 0.46126943826675415, |
| "learning_rate": 9.365291778345303e-07, |
| "loss": 0.099, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.555090069770813, |
| "learning_rate": 9.162886321935632e-07, |
| "loss": 0.1035, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 0.4217548668384552, |
| "learning_rate": 8.962471208834056e-07, |
| "loss": 0.1059, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 0.7031282186508179, |
| "learning_rate": 8.764056206990446e-07, |
| "loss": 0.1117, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 0.6522052884101868, |
| "learning_rate": 8.567650986872061e-07, |
| "loss": 0.1049, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.5371230840682983, |
| "learning_rate": 8.373265120992252e-07, |
| "loss": 0.1157, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 0.4255172312259674, |
| "learning_rate": 8.180908083443884e-07, |
| "loss": 0.1052, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 0.4094915986061096, |
| "learning_rate": 7.990589249437591e-07, |
| "loss": 0.1083, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 0.32630613446235657, |
| "learning_rate": 7.802317894844835e-07, |
| "loss": 0.1048, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.35342568159103394, |
| "learning_rate": 7.61610319574585e-07, |
| "loss": 0.107, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 0.4322859048843384, |
| "learning_rate": 7.43195422798233e-07, |
| "loss": 0.0994, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 0.41975167393684387, |
| "learning_rate": 7.249879966715174e-07, |
| "loss": 0.1166, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 0.9153004884719849, |
| "learning_rate": 7.069889285987025e-07, |
| "loss": 0.1156, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 2.4857826232910156, |
| "learning_rate": 6.891990958289724e-07, |
| "loss": 0.1029, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.6253427267074585, |
| "learning_rate": 6.716193654136788e-07, |
| "loss": 0.1108, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 0.9512668251991272, |
| "learning_rate": 6.542505941640803e-07, |
| "loss": 0.1084, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 0.46003222465515137, |
| "learning_rate": 6.370936286095842e-07, |
| "loss": 0.1046, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.37549811601638794, |
| "learning_rate": 6.201493049564883e-07, |
| "loss": 0.1058, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 0.554378867149353, |
| "learning_rate": 6.034184490472195e-07, |
| "loss": 0.1142, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.6527359485626221, |
| "learning_rate": 5.869018763200929e-07, |
| "loss": 0.1076, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 0.403913676738739, |
| "learning_rate": 5.706003917695619e-07, |
| "loss": 0.1083, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.4722561240196228, |
| "learning_rate": 5.545147899069836e-07, |
| "loss": 0.115, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 0.3714640736579895, |
| "learning_rate": 5.386458547219026e-07, |
| "loss": 0.1059, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 0.7594704627990723, |
| "learning_rate": 5.229943596438297e-07, |
| "loss": 0.1132, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.4244316518306732, |
| "learning_rate": 5.075610675045567e-07, |
| "loss": 0.1063, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.8828716278076172, |
| "learning_rate": 4.92346730500966e-07, |
| "loss": 0.1034, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 0.46588724851608276, |
| "learning_rate": 4.773520901583801e-07, |
| "loss": 0.1064, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 0.9723156094551086, |
| "learning_rate": 4.625778772944156e-07, |
| "loss": 0.1066, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 0.4076969027519226, |
| "learning_rate": 4.480248119833641e-07, |
| "loss": 0.1114, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.37374594807624817, |
| "learning_rate": 4.33693603521097e-07, |
| "loss": 0.1127, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 0.5135172605514526, |
| "learning_rate": 4.195849503904975e-07, |
| "loss": 0.1059, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 0.341399222612381, |
| "learning_rate": 4.056995402274122e-07, |
| "loss": 0.1063, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 0.6200608015060425, |
| "learning_rate": 3.920380497871473e-07, |
| "loss": 0.1063, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 1.903733730316162, |
| "learning_rate": 3.7860114491147017e-07, |
| "loss": 0.1, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.135728359222412, |
| "learning_rate": 3.6538948049616886e-07, |
| "loss": 0.115, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 0.3601432740688324, |
| "learning_rate": 3.524037004591274e-07, |
| "loss": 0.1061, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 0.6803275346755981, |
| "learning_rate": 3.396444377089453e-07, |
| "loss": 0.1156, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.5160225033760071, |
| "learning_rate": 3.271123141140886e-07, |
| "loss": 0.1056, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 0.4760143458843231, |
| "learning_rate": 3.148079404725801e-07, |
| "loss": 0.103, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.5659618377685547, |
| "learning_rate": 3.027319164822329e-07, |
| "loss": 0.105, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 0.4368997812271118, |
| "learning_rate": 2.908848307114198e-07, |
| "loss": 0.1093, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.4630069434642792, |
| "learning_rate": 2.792672605703867e-07, |
| "loss": 0.1052, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 0.6788815855979919, |
| "learning_rate": 2.6787977228311336e-07, |
| "loss": 0.1077, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 0.42769503593444824, |
| "learning_rate": 2.5672292085971276e-07, |
| "loss": 0.1125, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.4623875617980957, |
| "learning_rate": 2.457972500693834e-07, |
| "loss": 0.1092, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 2.632136821746826, |
| "learning_rate": 2.351032924139063e-07, |
| "loss": 0.1037, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 0.5099290609359741, |
| "learning_rate": 2.2464156910168954e-07, |
| "loss": 0.1083, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 0.6304298043251038, |
| "learning_rate": 2.1441259002236924e-07, |
| "loss": 0.1119, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 0.7745104432106018, |
| "learning_rate": 2.0441685372195487e-07, |
| "loss": 0.1065, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.4200075566768646, |
| "learning_rate": 1.9465484737853092e-07, |
| "loss": 0.1054, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 0.42068639397621155, |
| "learning_rate": 1.8512704677851489e-07, |
| "loss": 0.1067, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 2.7736501693725586, |
| "learning_rate": 1.758339162934658e-07, |
| "loss": 0.1045, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 0.7401975989341736, |
| "learning_rate": 1.6677590885745388e-07, |
| "loss": 0.1101, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 2.9965381622314453, |
| "learning_rate": 1.5795346594498162e-07, |
| "loss": 0.1089, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.3762933015823364, |
| "learning_rate": 1.4936701754947104e-07, |
| "loss": 0.1121, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 0.6434100866317749, |
| "learning_rate": 1.4101698216230254e-07, |
| "loss": 0.1107, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 0.4862038195133209, |
| "learning_rate": 1.3290376675242022e-07, |
| "loss": 0.1055, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.0907853841781616, |
| "learning_rate": 1.2502776674649776e-07, |
| "loss": 0.1089, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 1.1990622282028198, |
| "learning_rate": 1.1738936600966366e-07, |
| "loss": 0.1097, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.6390599012374878, |
| "learning_rate": 1.0998893682679479e-07, |
| "loss": 0.1157, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 0.5979716777801514, |
| "learning_rate": 1.0282683988436792e-07, |
| "loss": 0.1102, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.5007372498512268, |
| "learning_rate": 9.590342425288446e-08, |
| "loss": 0.0998, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 0.3509086072444916, |
| "learning_rate": 8.921902736985399e-08, |
| "loss": 0.111, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 0.6750816106796265, |
| "learning_rate": 8.277397502335194e-08, |
| "loss": 0.1031, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.5669856071472168, |
| "learning_rate": 7.656858133613498e-08, |
| "loss": 0.1076, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.5979962944984436, |
| "learning_rate": 7.060314875033836e-08, |
| "loss": 0.1016, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 1.0432602167129517, |
| "learning_rate": 6.487796801272983e-08, |
| "loss": 0.1137, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 0.6175380945205688, |
| "learning_rate": 5.939331816054161e-08, |
| "loss": 0.1108, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 0.4083493947982788, |
| "learning_rate": 5.414946650786957e-08, |
| "loss": 0.1015, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.4828687310218811, |
| "learning_rate": 4.914666863264528e-08, |
| "loss": 0.1038, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.962, |
| "grad_norm": 0.45067399740219116, |
| "learning_rate": 4.438516836417994e-08, |
| "loss": 0.1044, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.964, |
| "grad_norm": 0.6787665486335754, |
| "learning_rate": 3.986519777127884e-08, |
| "loss": 0.1013, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.966, |
| "grad_norm": 0.529229998588562, |
| "learning_rate": 3.558697715093207e-08, |
| "loss": 0.1168, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.9952864050865173, |
| "learning_rate": 3.1550715017575895e-08, |
| "loss": 0.1068, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.3543412685394287, |
| "learning_rate": 2.7756608092933678e-08, |
| "loss": 0.1169, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.0002, |
| "grad_norm": 0.6844854950904846, |
| "learning_rate": 2.4204841296424086e-08, |
| "loss": 0.1223, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.0022, |
| "grad_norm": 0.5493595600128174, |
| "learning_rate": 2.0895587736149414e-08, |
| "loss": 0.1135, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.0042, |
| "grad_norm": 0.3710947632789612, |
| "learning_rate": 1.7829008700460116e-08, |
| "loss": 0.1032, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.0062, |
| "grad_norm": 0.3379112482070923, |
| "learning_rate": 1.500525365009109e-08, |
| "loss": 0.1058, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.0082, |
| "grad_norm": 0.4570133686065674, |
| "learning_rate": 1.2424460210881394e-08, |
| "loss": 0.1146, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.0102, |
| "grad_norm": 0.41871699690818787, |
| "learning_rate": 1.008675416706073e-08, |
| "loss": 0.0956, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.0122, |
| "grad_norm": 0.34792661666870117, |
| "learning_rate": 7.992249455124889e-09, |
| "loss": 0.0974, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.0142, |
| "grad_norm": 1.3189023733139038, |
| "learning_rate": 6.141048158277429e-09, |
| "loss": 0.1055, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.0162, |
| "grad_norm": 0.6436313390731812, |
| "learning_rate": 4.533240501459202e-09, |
| "loss": 0.1092, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.0182, |
| "grad_norm": 0.40070950984954834, |
| "learning_rate": 3.1689048469457638e-09, |
| "loss": 0.1065, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.0202, |
| "grad_norm": 0.31388986110687256, |
| "learning_rate": 2.0481076905332074e-09, |
| "loss": 0.1114, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.0222, |
| "grad_norm": 0.47767698764801025, |
| "learning_rate": 1.170903658293532e-09, |
| "loss": 0.1065, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.0242, |
| "grad_norm": 0.8886580467224121, |
| "learning_rate": 5.373355039128836e-10, |
| "loss": 0.1032, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.0262, |
| "grad_norm": 0.49497994780540466, |
| "learning_rate": 1.4743410661044454e-10, |
| "loss": 0.1148, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.0282, |
| "grad_norm": 0.4880732297897339, |
| "learning_rate": 1.2184696296380083e-12, |
| "loss": 0.1156, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6916730815250432.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|