| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 490, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01020408163265306, |
| "grad_norm": 1.5778175592422485, |
| "learning_rate": 0.0, |
| "loss": 0.1624, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02040816326530612, |
| "grad_norm": 1.3363069295883179, |
| "learning_rate": 4.0816326530612243e-07, |
| "loss": 0.1369, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.030612244897959183, |
| "grad_norm": 1.248914361000061, |
| "learning_rate": 8.163265306122449e-07, |
| "loss": 0.1151, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.04081632653061224, |
| "grad_norm": 1.4152894020080566, |
| "learning_rate": 1.2244897959183673e-06, |
| "loss": 0.1031, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.05102040816326531, |
| "grad_norm": 1.2651602029800415, |
| "learning_rate": 1.6326530612244897e-06, |
| "loss": 0.097, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.061224489795918366, |
| "grad_norm": 1.289127230644226, |
| "learning_rate": 2.0408163265306125e-06, |
| "loss": 0.095, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.1998013257980347, |
| "learning_rate": 2.4489795918367347e-06, |
| "loss": 0.1236, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.08163265306122448, |
| "grad_norm": 1.538970947265625, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.1177, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.09183673469387756, |
| "grad_norm": 1.2269916534423828, |
| "learning_rate": 3.2653061224489794e-06, |
| "loss": 0.0931, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.10204081632653061, |
| "grad_norm": 1.2894030809402466, |
| "learning_rate": 3.6734693877551024e-06, |
| "loss": 0.1049, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11224489795918367, |
| "grad_norm": 0.9413341283798218, |
| "learning_rate": 4.081632653061225e-06, |
| "loss": 0.0757, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.12244897959183673, |
| "grad_norm": 1.1163856983184814, |
| "learning_rate": 4.489795918367348e-06, |
| "loss": 0.0936, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1326530612244898, |
| "grad_norm": 0.997565507888794, |
| "learning_rate": 4.897959183673469e-06, |
| "loss": 0.0797, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.0046685934066772, |
| "learning_rate": 5.306122448979593e-06, |
| "loss": 0.0855, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.15306122448979592, |
| "grad_norm": 0.9205936789512634, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.079, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16326530612244897, |
| "grad_norm": 0.8712719678878784, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 0.0666, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.17346938775510204, |
| "grad_norm": 0.9426755905151367, |
| "learning_rate": 6.530612244897959e-06, |
| "loss": 0.073, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1836734693877551, |
| "grad_norm": 0.8015092611312866, |
| "learning_rate": 6.938775510204082e-06, |
| "loss": 0.0669, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.19387755102040816, |
| "grad_norm": 0.7584081292152405, |
| "learning_rate": 7.346938775510205e-06, |
| "loss": 0.0517, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 0.7984261512756348, |
| "learning_rate": 7.755102040816327e-06, |
| "loss": 0.0667, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.7533179521560669, |
| "learning_rate": 8.16326530612245e-06, |
| "loss": 0.0639, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.22448979591836735, |
| "grad_norm": 0.9156713485717773, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.0729, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.23469387755102042, |
| "grad_norm": 0.6727928519248962, |
| "learning_rate": 8.979591836734695e-06, |
| "loss": 0.0565, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.24489795918367346, |
| "grad_norm": 0.5908196568489075, |
| "learning_rate": 9.387755102040818e-06, |
| "loss": 0.0501, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.25510204081632654, |
| "grad_norm": 0.5994157195091248, |
| "learning_rate": 9.795918367346939e-06, |
| "loss": 0.0596, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2653061224489796, |
| "grad_norm": 0.5699151754379272, |
| "learning_rate": 1.0204081632653063e-05, |
| "loss": 0.0478, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2755102040816326, |
| "grad_norm": 0.41071373224258423, |
| "learning_rate": 1.0612244897959186e-05, |
| "loss": 0.0306, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.6520228981971741, |
| "learning_rate": 1.1020408163265306e-05, |
| "loss": 0.0509, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.29591836734693877, |
| "grad_norm": 0.5062035918235779, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 0.0415, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.30612244897959184, |
| "grad_norm": 0.5349479913711548, |
| "learning_rate": 1.1836734693877552e-05, |
| "loss": 0.0396, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3163265306122449, |
| "grad_norm": 0.29080551862716675, |
| "learning_rate": 1.2244897959183674e-05, |
| "loss": 0.0215, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.32653061224489793, |
| "grad_norm": 0.5437124371528625, |
| "learning_rate": 1.2653061224489798e-05, |
| "loss": 0.0402, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.336734693877551, |
| "grad_norm": 0.7592443823814392, |
| "learning_rate": 1.3061224489795918e-05, |
| "loss": 0.0692, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.3469387755102041, |
| "grad_norm": 0.6122593283653259, |
| "learning_rate": 1.3469387755102042e-05, |
| "loss": 0.0602, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.31229618191719055, |
| "learning_rate": 1.3877551020408165e-05, |
| "loss": 0.0215, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3673469387755102, |
| "grad_norm": 0.37940043210983276, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 0.0274, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.37755102040816324, |
| "grad_norm": 0.2848958969116211, |
| "learning_rate": 1.469387755102041e-05, |
| "loss": 0.0212, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3877551020408163, |
| "grad_norm": 0.3313491940498352, |
| "learning_rate": 1.510204081632653e-05, |
| "loss": 0.0231, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3979591836734694, |
| "grad_norm": 0.24816128611564636, |
| "learning_rate": 1.5510204081632655e-05, |
| "loss": 0.0159, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 0.2411227524280548, |
| "learning_rate": 1.5918367346938776e-05, |
| "loss": 0.0154, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.41836734693877553, |
| "grad_norm": 0.20029953122138977, |
| "learning_rate": 1.63265306122449e-05, |
| "loss": 0.013, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.23513872921466827, |
| "learning_rate": 1.673469387755102e-05, |
| "loss": 0.0144, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.4387755102040816, |
| "grad_norm": 0.4002116918563843, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 0.0353, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.4489795918367347, |
| "grad_norm": 0.27058476209640503, |
| "learning_rate": 1.7551020408163266e-05, |
| "loss": 0.0169, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.45918367346938777, |
| "grad_norm": 0.09177622944116592, |
| "learning_rate": 1.795918367346939e-05, |
| "loss": 0.0055, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.46938775510204084, |
| "grad_norm": 0.16117192804813385, |
| "learning_rate": 1.836734693877551e-05, |
| "loss": 0.0098, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.47959183673469385, |
| "grad_norm": 0.1534506231546402, |
| "learning_rate": 1.8775510204081636e-05, |
| "loss": 0.0071, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.4897959183673469, |
| "grad_norm": 0.2407277524471283, |
| "learning_rate": 1.9183673469387756e-05, |
| "loss": 0.0167, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.1129893809556961, |
| "learning_rate": 1.9591836734693877e-05, |
| "loss": 0.0062, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.5102040816326531, |
| "grad_norm": 0.15642525255680084, |
| "learning_rate": 2e-05, |
| "loss": 0.0064, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5204081632653061, |
| "grad_norm": 0.30184754729270935, |
| "learning_rate": 1.9954648526077098e-05, |
| "loss": 0.0125, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.5306122448979592, |
| "grad_norm": 0.07106052339076996, |
| "learning_rate": 1.9909297052154198e-05, |
| "loss": 0.0044, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.5408163265306123, |
| "grad_norm": 0.26723626255989075, |
| "learning_rate": 1.9863945578231295e-05, |
| "loss": 0.0193, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.5510204081632653, |
| "grad_norm": 0.09843797981739044, |
| "learning_rate": 1.981859410430839e-05, |
| "loss": 0.0058, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.5612244897959183, |
| "grad_norm": 0.07936914265155792, |
| "learning_rate": 1.977324263038549e-05, |
| "loss": 0.0043, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.06574582308530807, |
| "learning_rate": 1.9727891156462588e-05, |
| "loss": 0.0036, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.5816326530612245, |
| "grad_norm": 0.030592354014515877, |
| "learning_rate": 1.9682539682539684e-05, |
| "loss": 0.0018, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5918367346938775, |
| "grad_norm": 0.07422778010368347, |
| "learning_rate": 1.963718820861678e-05, |
| "loss": 0.0039, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.6020408163265306, |
| "grad_norm": 0.05910489708185196, |
| "learning_rate": 1.9591836734693877e-05, |
| "loss": 0.0031, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 0.035012971609830856, |
| "learning_rate": 1.9546485260770977e-05, |
| "loss": 0.0019, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6224489795918368, |
| "grad_norm": 0.05377289652824402, |
| "learning_rate": 1.9501133786848074e-05, |
| "loss": 0.003, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.6326530612244898, |
| "grad_norm": 0.059411946684122086, |
| "learning_rate": 1.945578231292517e-05, |
| "loss": 0.003, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.07829850167036057, |
| "learning_rate": 1.941043083900227e-05, |
| "loss": 0.0039, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.6530612244897959, |
| "grad_norm": 0.1004122868180275, |
| "learning_rate": 1.9365079365079367e-05, |
| "loss": 0.0048, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.6632653061224489, |
| "grad_norm": 0.022585352882742882, |
| "learning_rate": 1.9319727891156463e-05, |
| "loss": 0.0013, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.673469387755102, |
| "grad_norm": 0.08342932909727097, |
| "learning_rate": 1.9274376417233563e-05, |
| "loss": 0.0039, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.6836734693877551, |
| "grad_norm": 0.3428645730018616, |
| "learning_rate": 1.922902494331066e-05, |
| "loss": 0.0113, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.6938775510204082, |
| "grad_norm": 0.08267664909362793, |
| "learning_rate": 1.9183673469387756e-05, |
| "loss": 0.0042, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.7040816326530612, |
| "grad_norm": 0.07195252925157547, |
| "learning_rate": 1.9138321995464853e-05, |
| "loss": 0.0029, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.024874132126569748, |
| "learning_rate": 1.9092970521541953e-05, |
| "loss": 0.0014, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7244897959183674, |
| "grad_norm": 0.03532341867685318, |
| "learning_rate": 1.904761904761905e-05, |
| "loss": 0.0012, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.7346938775510204, |
| "grad_norm": 0.01858861930668354, |
| "learning_rate": 1.9002267573696146e-05, |
| "loss": 0.001, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.7448979591836735, |
| "grad_norm": 0.36321982741355896, |
| "learning_rate": 1.8956916099773243e-05, |
| "loss": 0.0128, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.7551020408163265, |
| "grad_norm": 0.20222659409046173, |
| "learning_rate": 1.8911564625850343e-05, |
| "loss": 0.0076, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.7653061224489796, |
| "grad_norm": 0.07980707287788391, |
| "learning_rate": 1.886621315192744e-05, |
| "loss": 0.0031, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.7755102040816326, |
| "grad_norm": 0.020555464550852776, |
| "learning_rate": 1.8820861678004536e-05, |
| "loss": 0.0012, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.02769128419458866, |
| "learning_rate": 1.8775510204081636e-05, |
| "loss": 0.0014, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.7959183673469388, |
| "grad_norm": 0.030886279419064522, |
| "learning_rate": 1.8730158730158732e-05, |
| "loss": 0.0015, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.8061224489795918, |
| "grad_norm": 0.04239689186215401, |
| "learning_rate": 1.868480725623583e-05, |
| "loss": 0.0017, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 0.03217000514268875, |
| "learning_rate": 1.863945578231293e-05, |
| "loss": 0.0014, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.826530612244898, |
| "grad_norm": 0.029874522238969803, |
| "learning_rate": 1.8594104308390025e-05, |
| "loss": 0.0015, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.8367346938775511, |
| "grad_norm": 0.02627841755747795, |
| "learning_rate": 1.8548752834467122e-05, |
| "loss": 0.0013, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.8469387755102041, |
| "grad_norm": 0.019378235563635826, |
| "learning_rate": 1.8503401360544218e-05, |
| "loss": 0.001, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.08690612763166428, |
| "learning_rate": 1.8458049886621315e-05, |
| "loss": 0.0021, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.8673469387755102, |
| "grad_norm": 0.014376318082213402, |
| "learning_rate": 1.8412698412698415e-05, |
| "loss": 0.0008, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.8775510204081632, |
| "grad_norm": 0.01724099926650524, |
| "learning_rate": 1.836734693877551e-05, |
| "loss": 0.0009, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.8877551020408163, |
| "grad_norm": 0.3265489339828491, |
| "learning_rate": 1.8321995464852608e-05, |
| "loss": 0.0117, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.8979591836734694, |
| "grad_norm": 0.09740184992551804, |
| "learning_rate": 1.8276643990929708e-05, |
| "loss": 0.003, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.9081632653061225, |
| "grad_norm": 0.015478034503757954, |
| "learning_rate": 1.8231292517006804e-05, |
| "loss": 0.0008, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.9183673469387755, |
| "grad_norm": 0.18761862814426422, |
| "learning_rate": 1.81859410430839e-05, |
| "loss": 0.0068, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.038408756256103516, |
| "learning_rate": 1.8140589569161e-05, |
| "loss": 0.0014, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.9387755102040817, |
| "grad_norm": 0.03130817040801048, |
| "learning_rate": 1.8095238095238097e-05, |
| "loss": 0.0014, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.9489795918367347, |
| "grad_norm": 0.014020106755197048, |
| "learning_rate": 1.8049886621315194e-05, |
| "loss": 0.0007, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.9591836734693877, |
| "grad_norm": 0.02029995806515217, |
| "learning_rate": 1.8004535147392294e-05, |
| "loss": 0.0011, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.9693877551020408, |
| "grad_norm": 0.021185798570513725, |
| "learning_rate": 1.795918367346939e-05, |
| "loss": 0.0009, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.9795918367346939, |
| "grad_norm": 0.014589856378734112, |
| "learning_rate": 1.7913832199546487e-05, |
| "loss": 0.0008, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.9897959183673469, |
| "grad_norm": 0.022265039384365082, |
| "learning_rate": 1.7868480725623583e-05, |
| "loss": 0.0011, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0408700592815876, |
| "learning_rate": 1.782312925170068e-05, |
| "loss": 0.0011, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.010204081632653, |
| "grad_norm": 0.009026318788528442, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 0.0005, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 0.007882497273385525, |
| "learning_rate": 1.7732426303854877e-05, |
| "loss": 0.0005, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.030612244897959, |
| "grad_norm": 0.025666292756795883, |
| "learning_rate": 1.7687074829931973e-05, |
| "loss": 0.0012, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.0408163265306123, |
| "grad_norm": 0.01795661635696888, |
| "learning_rate": 1.7641723356009073e-05, |
| "loss": 0.0008, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.0510204081632653, |
| "grad_norm": 0.04071149602532387, |
| "learning_rate": 1.759637188208617e-05, |
| "loss": 0.0016, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.0612244897959184, |
| "grad_norm": 0.007932674139738083, |
| "learning_rate": 1.7551020408163266e-05, |
| "loss": 0.0005, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.03695099800825119, |
| "learning_rate": 1.7505668934240366e-05, |
| "loss": 0.0015, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.0816326530612246, |
| "grad_norm": 0.008060461841523647, |
| "learning_rate": 1.7460317460317463e-05, |
| "loss": 0.0005, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.0918367346938775, |
| "grad_norm": 0.04425932839512825, |
| "learning_rate": 1.741496598639456e-05, |
| "loss": 0.0018, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.1020408163265305, |
| "grad_norm": 0.010241498239338398, |
| "learning_rate": 1.736961451247166e-05, |
| "loss": 0.0006, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.1122448979591837, |
| "grad_norm": 0.010430874302983284, |
| "learning_rate": 1.7324263038548756e-05, |
| "loss": 0.0006, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.1224489795918366, |
| "grad_norm": 0.16115950047969818, |
| "learning_rate": 1.7278911564625852e-05, |
| "loss": 0.0043, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.1326530612244898, |
| "grad_norm": 0.018837768584489822, |
| "learning_rate": 1.723356009070295e-05, |
| "loss": 0.0007, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.016730893403291702, |
| "learning_rate": 1.7188208616780045e-05, |
| "loss": 0.0009, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.153061224489796, |
| "grad_norm": 0.011841993778944016, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 0.0007, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.163265306122449, |
| "grad_norm": 0.045097168534994125, |
| "learning_rate": 1.7097505668934242e-05, |
| "loss": 0.0019, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.1734693877551021, |
| "grad_norm": 0.09953276813030243, |
| "learning_rate": 1.705215419501134e-05, |
| "loss": 0.0032, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.183673469387755, |
| "grad_norm": 0.007014868780970573, |
| "learning_rate": 1.7006802721088435e-05, |
| "loss": 0.0004, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.193877551020408, |
| "grad_norm": 0.009045367129147053, |
| "learning_rate": 1.6961451247165535e-05, |
| "loss": 0.0005, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.2040816326530612, |
| "grad_norm": 0.007859342731535435, |
| "learning_rate": 1.691609977324263e-05, |
| "loss": 0.0005, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 0.032524097710847855, |
| "learning_rate": 1.687074829931973e-05, |
| "loss": 0.0009, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 0.05151795968413353, |
| "learning_rate": 1.6825396825396828e-05, |
| "loss": 0.0018, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.2346938775510203, |
| "grad_norm": 0.010988794267177582, |
| "learning_rate": 1.6780045351473924e-05, |
| "loss": 0.0006, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.2448979591836735, |
| "grad_norm": 0.006904716603457928, |
| "learning_rate": 1.673469387755102e-05, |
| "loss": 0.0004, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.2551020408163265, |
| "grad_norm": 0.006797518581151962, |
| "learning_rate": 1.668934240362812e-05, |
| "loss": 0.0004, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.2653061224489797, |
| "grad_norm": 0.01896447129547596, |
| "learning_rate": 1.6643990929705217e-05, |
| "loss": 0.0008, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.2755102040816326, |
| "grad_norm": 0.01258290559053421, |
| "learning_rate": 1.6598639455782314e-05, |
| "loss": 0.0007, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.015102504752576351, |
| "learning_rate": 1.655328798185941e-05, |
| "loss": 0.0006, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.2959183673469388, |
| "grad_norm": 0.005591754335910082, |
| "learning_rate": 1.6507936507936507e-05, |
| "loss": 0.0004, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.306122448979592, |
| "grad_norm": 0.10714168101549149, |
| "learning_rate": 1.6462585034013607e-05, |
| "loss": 0.0032, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.316326530612245, |
| "grad_norm": 0.030577057972550392, |
| "learning_rate": 1.6417233560090704e-05, |
| "loss": 0.0011, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.3265306122448979, |
| "grad_norm": 0.017115091904997826, |
| "learning_rate": 1.63718820861678e-05, |
| "loss": 0.0008, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.336734693877551, |
| "grad_norm": 0.011700804345309734, |
| "learning_rate": 1.63265306122449e-05, |
| "loss": 0.0006, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.346938775510204, |
| "grad_norm": 0.008858302608132362, |
| "learning_rate": 1.6281179138321997e-05, |
| "loss": 0.0004, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.3571428571428572, |
| "grad_norm": 0.00907884445041418, |
| "learning_rate": 1.6235827664399097e-05, |
| "loss": 0.0005, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.3673469387755102, |
| "grad_norm": 0.00645515276119113, |
| "learning_rate": 1.6190476190476193e-05, |
| "loss": 0.0003, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.3775510204081631, |
| "grad_norm": 0.01644102856516838, |
| "learning_rate": 1.614512471655329e-05, |
| "loss": 0.0006, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.3877551020408163, |
| "grad_norm": 0.023088015615940094, |
| "learning_rate": 1.6099773242630386e-05, |
| "loss": 0.0009, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.3979591836734695, |
| "grad_norm": 0.004741874989122152, |
| "learning_rate": 1.6054421768707483e-05, |
| "loss": 0.0003, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.4081632653061225, |
| "grad_norm": 0.005127857904881239, |
| "learning_rate": 1.6009070294784583e-05, |
| "loss": 0.0003, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.4183673469387754, |
| "grad_norm": 0.009942681528627872, |
| "learning_rate": 1.596371882086168e-05, |
| "loss": 0.0005, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.009013745002448559, |
| "learning_rate": 1.5918367346938776e-05, |
| "loss": 0.0005, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.4387755102040816, |
| "grad_norm": 0.008382913656532764, |
| "learning_rate": 1.5873015873015872e-05, |
| "loss": 0.0005, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.4489795918367347, |
| "grad_norm": 0.01376293320208788, |
| "learning_rate": 1.5827664399092972e-05, |
| "loss": 0.0006, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.4591836734693877, |
| "grad_norm": 0.07127456367015839, |
| "learning_rate": 1.578231292517007e-05, |
| "loss": 0.0022, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.469387755102041, |
| "grad_norm": 0.006247013341635466, |
| "learning_rate": 1.5736961451247165e-05, |
| "loss": 0.0004, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.4795918367346939, |
| "grad_norm": 0.03836556524038315, |
| "learning_rate": 1.5691609977324265e-05, |
| "loss": 0.0012, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.489795918367347, |
| "grad_norm": 0.011062193661928177, |
| "learning_rate": 1.5646258503401362e-05, |
| "loss": 0.0006, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.005953874904662371, |
| "learning_rate": 1.5600907029478462e-05, |
| "loss": 0.0003, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.510204081632653, |
| "grad_norm": 0.014096422120928764, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 0.0008, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.5204081632653061, |
| "grad_norm": 0.0064276340417563915, |
| "learning_rate": 1.5510204081632655e-05, |
| "loss": 0.0004, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.5306122448979593, |
| "grad_norm": 0.007169738411903381, |
| "learning_rate": 1.546485260770975e-05, |
| "loss": 0.0004, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.5408163265306123, |
| "grad_norm": 0.005434677470475435, |
| "learning_rate": 1.5419501133786848e-05, |
| "loss": 0.0004, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.5510204081632653, |
| "grad_norm": 0.008770623244345188, |
| "learning_rate": 1.5374149659863945e-05, |
| "loss": 0.0004, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.5612244897959182, |
| "grad_norm": 0.011159502901136875, |
| "learning_rate": 1.5328798185941044e-05, |
| "loss": 0.0007, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.10471421480178833, |
| "learning_rate": 1.528344671201814e-05, |
| "loss": 0.0022, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.5816326530612246, |
| "grad_norm": 0.008612933568656445, |
| "learning_rate": 1.523809523809524e-05, |
| "loss": 0.0005, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.5918367346938775, |
| "grad_norm": 0.006375262048095465, |
| "learning_rate": 1.5192743764172338e-05, |
| "loss": 0.0003, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.6020408163265305, |
| "grad_norm": 0.00903844740241766, |
| "learning_rate": 1.5147392290249434e-05, |
| "loss": 0.0005, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.6122448979591837, |
| "grad_norm": 0.005267101805657148, |
| "learning_rate": 1.510204081632653e-05, |
| "loss": 0.0003, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.6224489795918369, |
| "grad_norm": 0.006081985309720039, |
| "learning_rate": 1.505668934240363e-05, |
| "loss": 0.0004, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.6326530612244898, |
| "grad_norm": 0.0072037833742797375, |
| "learning_rate": 1.5011337868480727e-05, |
| "loss": 0.0004, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.6428571428571428, |
| "grad_norm": 0.0033731532748788595, |
| "learning_rate": 1.4965986394557825e-05, |
| "loss": 0.0002, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.6530612244897958, |
| "grad_norm": 0.0077390824444592, |
| "learning_rate": 1.4920634920634922e-05, |
| "loss": 0.0005, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.663265306122449, |
| "grad_norm": 0.009692452847957611, |
| "learning_rate": 1.4875283446712018e-05, |
| "loss": 0.0005, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.6734693877551021, |
| "grad_norm": 0.006450532004237175, |
| "learning_rate": 1.4829931972789118e-05, |
| "loss": 0.0003, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.683673469387755, |
| "grad_norm": 0.009719816036522388, |
| "learning_rate": 1.4784580498866215e-05, |
| "loss": 0.0005, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.693877551020408, |
| "grad_norm": 0.010457034222781658, |
| "learning_rate": 1.4739229024943311e-05, |
| "loss": 0.0005, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.7040816326530612, |
| "grad_norm": 0.008015105500817299, |
| "learning_rate": 1.469387755102041e-05, |
| "loss": 0.0004, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.006307144183665514, |
| "learning_rate": 1.4648526077097506e-05, |
| "loss": 0.0003, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.7244897959183674, |
| "grad_norm": 0.005334992427378893, |
| "learning_rate": 1.4603174603174603e-05, |
| "loss": 0.0003, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.7346938775510203, |
| "grad_norm": 0.008067265152931213, |
| "learning_rate": 1.4557823129251703e-05, |
| "loss": 0.0003, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.7448979591836735, |
| "grad_norm": 0.012681787833571434, |
| "learning_rate": 1.45124716553288e-05, |
| "loss": 0.0005, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.7551020408163265, |
| "grad_norm": 0.01536930724978447, |
| "learning_rate": 1.4467120181405896e-05, |
| "loss": 0.0005, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.7653061224489797, |
| "grad_norm": 0.0037332891952246428, |
| "learning_rate": 1.4421768707482994e-05, |
| "loss": 0.0002, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.7755102040816326, |
| "grad_norm": 0.010341755114495754, |
| "learning_rate": 1.4376417233560092e-05, |
| "loss": 0.0005, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.0045587471686303616, |
| "learning_rate": 1.433106575963719e-05, |
| "loss": 0.0003, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.7959183673469388, |
| "grad_norm": 0.016639186069369316, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 0.0006, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.806122448979592, |
| "grad_norm": 0.005003046710044146, |
| "learning_rate": 1.4240362811791384e-05, |
| "loss": 0.0003, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.816326530612245, |
| "grad_norm": 0.009210484102368355, |
| "learning_rate": 1.4195011337868484e-05, |
| "loss": 0.0004, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.8265306122448979, |
| "grad_norm": 0.007876208052039146, |
| "learning_rate": 1.414965986394558e-05, |
| "loss": 0.0004, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 0.0038002703804522753, |
| "learning_rate": 1.4104308390022677e-05, |
| "loss": 0.0002, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.8469387755102042, |
| "grad_norm": 0.00423433817923069, |
| "learning_rate": 1.4058956916099775e-05, |
| "loss": 0.0002, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.008944015018641949, |
| "learning_rate": 1.4013605442176872e-05, |
| "loss": 0.0005, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.8673469387755102, |
| "grad_norm": 0.004832221195101738, |
| "learning_rate": 1.3968253968253968e-05, |
| "loss": 0.0003, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.8775510204081631, |
| "grad_norm": 0.005358612630516291, |
| "learning_rate": 1.3922902494331068e-05, |
| "loss": 0.0003, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.8877551020408163, |
| "grad_norm": 0.004266591276973486, |
| "learning_rate": 1.3877551020408165e-05, |
| "loss": 0.0002, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.8979591836734695, |
| "grad_norm": 0.004511923063546419, |
| "learning_rate": 1.3832199546485261e-05, |
| "loss": 0.0003, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.9081632653061225, |
| "grad_norm": 0.12353862076997757, |
| "learning_rate": 1.378684807256236e-05, |
| "loss": 0.0032, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.9183673469387754, |
| "grad_norm": 0.009472350589931011, |
| "learning_rate": 1.3741496598639456e-05, |
| "loss": 0.0006, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 0.005253692157566547, |
| "learning_rate": 1.3696145124716554e-05, |
| "loss": 0.0003, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.9387755102040818, |
| "grad_norm": 0.01199701614677906, |
| "learning_rate": 1.3650793650793652e-05, |
| "loss": 0.0005, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.9489795918367347, |
| "grad_norm": 0.006006367038935423, |
| "learning_rate": 1.3605442176870749e-05, |
| "loss": 0.0003, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.9591836734693877, |
| "grad_norm": 0.010423636995255947, |
| "learning_rate": 1.3560090702947847e-05, |
| "loss": 0.0004, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.9693877551020407, |
| "grad_norm": 0.006484678015112877, |
| "learning_rate": 1.3514739229024945e-05, |
| "loss": 0.0004, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.9795918367346939, |
| "grad_norm": 0.007823942229151726, |
| "learning_rate": 1.3469387755102042e-05, |
| "loss": 0.0004, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.989795918367347, |
| "grad_norm": 0.005013170652091503, |
| "learning_rate": 1.342403628117914e-05, |
| "loss": 0.0003, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.003286719787865877, |
| "learning_rate": 1.3378684807256237e-05, |
| "loss": 0.0001, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.010204081632653, |
| "grad_norm": 0.007182662840932608, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.0003, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.020408163265306, |
| "grad_norm": 0.007124132476747036, |
| "learning_rate": 1.3287981859410433e-05, |
| "loss": 0.0003, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.0306122448979593, |
| "grad_norm": 0.0033109758514910936, |
| "learning_rate": 1.324263038548753e-05, |
| "loss": 0.0002, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 0.0030752080492675304, |
| "learning_rate": 1.3197278911564626e-05, |
| "loss": 0.0002, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.0510204081632653, |
| "grad_norm": 0.003937916364520788, |
| "learning_rate": 1.3151927437641725e-05, |
| "loss": 0.0003, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.061224489795918, |
| "grad_norm": 0.003902744734659791, |
| "learning_rate": 1.3106575963718821e-05, |
| "loss": 0.0002, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.0714285714285716, |
| "grad_norm": 0.003365420503541827, |
| "learning_rate": 1.3061224489795918e-05, |
| "loss": 0.0002, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.0816326530612246, |
| "grad_norm": 0.005177025683224201, |
| "learning_rate": 1.3015873015873018e-05, |
| "loss": 0.0003, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.0918367346938775, |
| "grad_norm": 0.004482835531234741, |
| "learning_rate": 1.2970521541950114e-05, |
| "loss": 0.0003, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.1020408163265305, |
| "grad_norm": 0.03137246519327164, |
| "learning_rate": 1.2925170068027212e-05, |
| "loss": 0.0008, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.1122448979591835, |
| "grad_norm": 0.0061664879322052, |
| "learning_rate": 1.2879818594104309e-05, |
| "loss": 0.0004, |
| "step": 207 |
| }, |
| { |
| "epoch": 2.122448979591837, |
| "grad_norm": 0.007974425330758095, |
| "learning_rate": 1.2834467120181407e-05, |
| "loss": 0.0004, |
| "step": 208 |
| }, |
| { |
| "epoch": 2.13265306122449, |
| "grad_norm": 0.008234084583818913, |
| "learning_rate": 1.2789115646258505e-05, |
| "loss": 0.0004, |
| "step": 209 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.005270775873214006, |
| "learning_rate": 1.2743764172335602e-05, |
| "loss": 0.0003, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.1530612244897958, |
| "grad_norm": 0.008359711617231369, |
| "learning_rate": 1.2698412698412699e-05, |
| "loss": 0.0004, |
| "step": 211 |
| }, |
| { |
| "epoch": 2.163265306122449, |
| "grad_norm": 0.004007325973361731, |
| "learning_rate": 1.2653061224489798e-05, |
| "loss": 0.0002, |
| "step": 212 |
| }, |
| { |
| "epoch": 2.173469387755102, |
| "grad_norm": 0.004752746783196926, |
| "learning_rate": 1.2607709750566895e-05, |
| "loss": 0.0002, |
| "step": 213 |
| }, |
| { |
| "epoch": 2.183673469387755, |
| "grad_norm": 0.0031563639640808105, |
| "learning_rate": 1.2562358276643992e-05, |
| "loss": 0.0002, |
| "step": 214 |
| }, |
| { |
| "epoch": 2.193877551020408, |
| "grad_norm": 0.003636228386312723, |
| "learning_rate": 1.251700680272109e-05, |
| "loss": 0.0002, |
| "step": 215 |
| }, |
| { |
| "epoch": 2.204081632653061, |
| "grad_norm": 0.0034094173461198807, |
| "learning_rate": 1.2471655328798186e-05, |
| "loss": 0.0002, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.2142857142857144, |
| "grad_norm": 0.004791253712028265, |
| "learning_rate": 1.2426303854875283e-05, |
| "loss": 0.0003, |
| "step": 217 |
| }, |
| { |
| "epoch": 2.2244897959183674, |
| "grad_norm": 0.010279831476509571, |
| "learning_rate": 1.2380952380952383e-05, |
| "loss": 0.0004, |
| "step": 218 |
| }, |
| { |
| "epoch": 2.2346938775510203, |
| "grad_norm": 0.006269859150052071, |
| "learning_rate": 1.233560090702948e-05, |
| "loss": 0.0003, |
| "step": 219 |
| }, |
| { |
| "epoch": 2.2448979591836733, |
| "grad_norm": 0.003878034185618162, |
| "learning_rate": 1.2290249433106578e-05, |
| "loss": 0.0002, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.2551020408163267, |
| "grad_norm": 0.0031356397084891796, |
| "learning_rate": 1.2244897959183674e-05, |
| "loss": 0.0002, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.2653061224489797, |
| "grad_norm": 0.004956800024956465, |
| "learning_rate": 1.219954648526077e-05, |
| "loss": 0.0003, |
| "step": 222 |
| }, |
| { |
| "epoch": 2.2755102040816326, |
| "grad_norm": 0.0036491460632532835, |
| "learning_rate": 1.215419501133787e-05, |
| "loss": 0.0002, |
| "step": 223 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.005171376280486584, |
| "learning_rate": 1.2108843537414967e-05, |
| "loss": 0.0003, |
| "step": 224 |
| }, |
| { |
| "epoch": 2.295918367346939, |
| "grad_norm": 0.0029648093041032553, |
| "learning_rate": 1.2063492063492064e-05, |
| "loss": 0.0002, |
| "step": 225 |
| }, |
| { |
| "epoch": 2.306122448979592, |
| "grad_norm": 0.006329487543553114, |
| "learning_rate": 1.2018140589569162e-05, |
| "loss": 0.0003, |
| "step": 226 |
| }, |
| { |
| "epoch": 2.316326530612245, |
| "grad_norm": 0.0031556261237710714, |
| "learning_rate": 1.197278911564626e-05, |
| "loss": 0.0002, |
| "step": 227 |
| }, |
| { |
| "epoch": 2.326530612244898, |
| "grad_norm": 0.009794807992875576, |
| "learning_rate": 1.1927437641723357e-05, |
| "loss": 0.0004, |
| "step": 228 |
| }, |
| { |
| "epoch": 2.336734693877551, |
| "grad_norm": 0.003714526304975152, |
| "learning_rate": 1.1882086167800455e-05, |
| "loss": 0.0002, |
| "step": 229 |
| }, |
| { |
| "epoch": 2.3469387755102042, |
| "grad_norm": 0.0031528149265795946, |
| "learning_rate": 1.1836734693877552e-05, |
| "loss": 0.0002, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.357142857142857, |
| "grad_norm": 0.024612465873360634, |
| "learning_rate": 1.1791383219954648e-05, |
| "loss": 0.001, |
| "step": 231 |
| }, |
| { |
| "epoch": 2.36734693877551, |
| "grad_norm": 0.00424389261752367, |
| "learning_rate": 1.1746031746031748e-05, |
| "loss": 0.0002, |
| "step": 232 |
| }, |
| { |
| "epoch": 2.377551020408163, |
| "grad_norm": 0.01282750815153122, |
| "learning_rate": 1.1700680272108845e-05, |
| "loss": 0.0006, |
| "step": 233 |
| }, |
| { |
| "epoch": 2.387755102040816, |
| "grad_norm": 0.006169433705508709, |
| "learning_rate": 1.1655328798185943e-05, |
| "loss": 0.0003, |
| "step": 234 |
| }, |
| { |
| "epoch": 2.3979591836734695, |
| "grad_norm": 0.005562425125390291, |
| "learning_rate": 1.160997732426304e-05, |
| "loss": 0.0003, |
| "step": 235 |
| }, |
| { |
| "epoch": 2.4081632653061225, |
| "grad_norm": 0.01002059318125248, |
| "learning_rate": 1.1564625850340136e-05, |
| "loss": 0.0005, |
| "step": 236 |
| }, |
| { |
| "epoch": 2.4183673469387754, |
| "grad_norm": 0.007645392790436745, |
| "learning_rate": 1.1519274376417236e-05, |
| "loss": 0.0004, |
| "step": 237 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 0.03314538300037384, |
| "learning_rate": 1.1473922902494332e-05, |
| "loss": 0.0011, |
| "step": 238 |
| }, |
| { |
| "epoch": 2.438775510204082, |
| "grad_norm": 0.04838201776146889, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 0.0009, |
| "step": 239 |
| }, |
| { |
| "epoch": 2.4489795918367347, |
| "grad_norm": 0.006126615218818188, |
| "learning_rate": 1.1383219954648527e-05, |
| "loss": 0.0004, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.4591836734693877, |
| "grad_norm": 0.005240059457719326, |
| "learning_rate": 1.1337868480725624e-05, |
| "loss": 0.0003, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.4693877551020407, |
| "grad_norm": 0.006122751161456108, |
| "learning_rate": 1.1292517006802722e-05, |
| "loss": 0.0003, |
| "step": 242 |
| }, |
| { |
| "epoch": 2.479591836734694, |
| "grad_norm": 0.0024781699758023024, |
| "learning_rate": 1.124716553287982e-05, |
| "loss": 0.0002, |
| "step": 243 |
| }, |
| { |
| "epoch": 2.489795918367347, |
| "grad_norm": 0.00649678660556674, |
| "learning_rate": 1.1201814058956917e-05, |
| "loss": 0.0004, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.003478443017229438, |
| "learning_rate": 1.1156462585034013e-05, |
| "loss": 0.0002, |
| "step": 245 |
| }, |
| { |
| "epoch": 2.510204081632653, |
| "grad_norm": 0.003858257783576846, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 0.0002, |
| "step": 246 |
| }, |
| { |
| "epoch": 2.520408163265306, |
| "grad_norm": 0.006577960215508938, |
| "learning_rate": 1.106575963718821e-05, |
| "loss": 0.0004, |
| "step": 247 |
| }, |
| { |
| "epoch": 2.5306122448979593, |
| "grad_norm": 0.004543396644294262, |
| "learning_rate": 1.1020408163265306e-05, |
| "loss": 0.0003, |
| "step": 248 |
| }, |
| { |
| "epoch": 2.5408163265306123, |
| "grad_norm": 0.0032837800681591034, |
| "learning_rate": 1.0975056689342405e-05, |
| "loss": 0.0002, |
| "step": 249 |
| }, |
| { |
| "epoch": 2.5510204081632653, |
| "grad_norm": 0.012741784565150738, |
| "learning_rate": 1.0929705215419501e-05, |
| "loss": 0.0006, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.561224489795918, |
| "grad_norm": 0.002741026459261775, |
| "learning_rate": 1.0884353741496601e-05, |
| "loss": 0.0002, |
| "step": 251 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.0027930692303925753, |
| "learning_rate": 1.0839002267573698e-05, |
| "loss": 0.0002, |
| "step": 252 |
| }, |
| { |
| "epoch": 2.5816326530612246, |
| "grad_norm": 0.003156407503411174, |
| "learning_rate": 1.0793650793650794e-05, |
| "loss": 0.0002, |
| "step": 253 |
| }, |
| { |
| "epoch": 2.5918367346938775, |
| "grad_norm": 0.0036412908229976892, |
| "learning_rate": 1.0748299319727893e-05, |
| "loss": 0.0002, |
| "step": 254 |
| }, |
| { |
| "epoch": 2.6020408163265305, |
| "grad_norm": 0.04034988954663277, |
| "learning_rate": 1.0702947845804989e-05, |
| "loss": 0.0013, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.612244897959184, |
| "grad_norm": 0.0034895159769803286, |
| "learning_rate": 1.0657596371882086e-05, |
| "loss": 0.0002, |
| "step": 256 |
| }, |
| { |
| "epoch": 2.622448979591837, |
| "grad_norm": 0.04325950890779495, |
| "learning_rate": 1.0612244897959186e-05, |
| "loss": 0.0012, |
| "step": 257 |
| }, |
| { |
| "epoch": 2.63265306122449, |
| "grad_norm": 0.004671269562095404, |
| "learning_rate": 1.0566893424036282e-05, |
| "loss": 0.0003, |
| "step": 258 |
| }, |
| { |
| "epoch": 2.642857142857143, |
| "grad_norm": 0.002534637926146388, |
| "learning_rate": 1.0521541950113379e-05, |
| "loss": 0.0002, |
| "step": 259 |
| }, |
| { |
| "epoch": 2.6530612244897958, |
| "grad_norm": 0.004457306116819382, |
| "learning_rate": 1.0476190476190477e-05, |
| "loss": 0.0003, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.663265306122449, |
| "grad_norm": 0.004050545394420624, |
| "learning_rate": 1.0430839002267575e-05, |
| "loss": 0.0002, |
| "step": 261 |
| }, |
| { |
| "epoch": 2.673469387755102, |
| "grad_norm": 0.04582836106419563, |
| "learning_rate": 1.0385487528344672e-05, |
| "loss": 0.0011, |
| "step": 262 |
| }, |
| { |
| "epoch": 2.683673469387755, |
| "grad_norm": 0.004835136700421572, |
| "learning_rate": 1.034013605442177e-05, |
| "loss": 0.0003, |
| "step": 263 |
| }, |
| { |
| "epoch": 2.693877551020408, |
| "grad_norm": 0.008025884628295898, |
| "learning_rate": 1.0294784580498866e-05, |
| "loss": 0.0003, |
| "step": 264 |
| }, |
| { |
| "epoch": 2.704081632653061, |
| "grad_norm": 0.007876653224229813, |
| "learning_rate": 1.0249433106575966e-05, |
| "loss": 0.0004, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 0.006527318619191647, |
| "learning_rate": 1.0204081632653063e-05, |
| "loss": 0.0003, |
| "step": 266 |
| }, |
| { |
| "epoch": 2.7244897959183674, |
| "grad_norm": 0.002143925055861473, |
| "learning_rate": 1.015873015873016e-05, |
| "loss": 0.0001, |
| "step": 267 |
| }, |
| { |
| "epoch": 2.7346938775510203, |
| "grad_norm": 0.003183850785717368, |
| "learning_rate": 1.0113378684807258e-05, |
| "loss": 0.0002, |
| "step": 268 |
| }, |
| { |
| "epoch": 2.7448979591836737, |
| "grad_norm": 0.003816920565441251, |
| "learning_rate": 1.0068027210884354e-05, |
| "loss": 0.0002, |
| "step": 269 |
| }, |
| { |
| "epoch": 2.7551020408163263, |
| "grad_norm": 0.005489765666425228, |
| "learning_rate": 1.0022675736961451e-05, |
| "loss": 0.0003, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.7653061224489797, |
| "grad_norm": 0.002469045575708151, |
| "learning_rate": 9.977324263038549e-06, |
| "loss": 0.0002, |
| "step": 271 |
| }, |
| { |
| "epoch": 2.7755102040816326, |
| "grad_norm": 0.0031796926632523537, |
| "learning_rate": 9.931972789115647e-06, |
| "loss": 0.0002, |
| "step": 272 |
| }, |
| { |
| "epoch": 2.7857142857142856, |
| "grad_norm": 0.003679267829284072, |
| "learning_rate": 9.886621315192746e-06, |
| "loss": 0.0002, |
| "step": 273 |
| }, |
| { |
| "epoch": 2.795918367346939, |
| "grad_norm": 0.010035431012511253, |
| "learning_rate": 9.841269841269842e-06, |
| "loss": 0.0004, |
| "step": 274 |
| }, |
| { |
| "epoch": 2.806122448979592, |
| "grad_norm": 0.0031564754899591208, |
| "learning_rate": 9.795918367346939e-06, |
| "loss": 0.0002, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.816326530612245, |
| "grad_norm": 0.005581808276474476, |
| "learning_rate": 9.750566893424037e-06, |
| "loss": 0.0003, |
| "step": 276 |
| }, |
| { |
| "epoch": 2.826530612244898, |
| "grad_norm": 0.002813218394294381, |
| "learning_rate": 9.705215419501135e-06, |
| "loss": 0.0002, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.836734693877551, |
| "grad_norm": 0.003005703678354621, |
| "learning_rate": 9.659863945578232e-06, |
| "loss": 0.0002, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.8469387755102042, |
| "grad_norm": 0.0068191043101251125, |
| "learning_rate": 9.61451247165533e-06, |
| "loss": 0.0004, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.008098878897726536, |
| "learning_rate": 9.569160997732427e-06, |
| "loss": 0.0004, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.86734693877551, |
| "grad_norm": 0.014086649753153324, |
| "learning_rate": 9.523809523809525e-06, |
| "loss": 0.0004, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.877551020408163, |
| "grad_norm": 0.004192018415778875, |
| "learning_rate": 9.478458049886621e-06, |
| "loss": 0.0002, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.887755102040816, |
| "grad_norm": 0.0025980896316468716, |
| "learning_rate": 9.43310657596372e-06, |
| "loss": 0.0002, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.8979591836734695, |
| "grad_norm": 0.010852901265025139, |
| "learning_rate": 9.387755102040818e-06, |
| "loss": 0.0004, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.9081632653061225, |
| "grad_norm": 0.003038214286789298, |
| "learning_rate": 9.342403628117914e-06, |
| "loss": 0.0002, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.9183673469387754, |
| "grad_norm": 0.0038609837647527456, |
| "learning_rate": 9.297052154195013e-06, |
| "loss": 0.0002, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.928571428571429, |
| "grad_norm": 0.0073219058103859425, |
| "learning_rate": 9.251700680272109e-06, |
| "loss": 0.0004, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.938775510204082, |
| "grad_norm": 0.004274952691048384, |
| "learning_rate": 9.206349206349207e-06, |
| "loss": 0.0003, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.9489795918367347, |
| "grad_norm": 0.003952549304813147, |
| "learning_rate": 9.160997732426304e-06, |
| "loss": 0.0002, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.9591836734693877, |
| "grad_norm": 0.03005502186715603, |
| "learning_rate": 9.115646258503402e-06, |
| "loss": 0.0006, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.9693877551020407, |
| "grad_norm": 0.0023858973290771246, |
| "learning_rate": 9.0702947845805e-06, |
| "loss": 0.0002, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.979591836734694, |
| "grad_norm": 0.004339877981692553, |
| "learning_rate": 9.024943310657597e-06, |
| "loss": 0.0003, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.989795918367347, |
| "grad_norm": 0.006001957226544619, |
| "learning_rate": 8.979591836734695e-06, |
| "loss": 0.0003, |
| "step": 293 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.007597202900797129, |
| "learning_rate": 8.934240362811792e-06, |
| "loss": 0.0002, |
| "step": 294 |
| }, |
| { |
| "epoch": 3.010204081632653, |
| "grad_norm": 0.003272986738011241, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.0002, |
| "step": 295 |
| }, |
| { |
| "epoch": 3.020408163265306, |
| "grad_norm": 0.0023373092990368605, |
| "learning_rate": 8.843537414965987e-06, |
| "loss": 0.0001, |
| "step": 296 |
| }, |
| { |
| "epoch": 3.0306122448979593, |
| "grad_norm": 0.0037628381978720427, |
| "learning_rate": 8.798185941043085e-06, |
| "loss": 0.0002, |
| "step": 297 |
| }, |
| { |
| "epoch": 3.0408163265306123, |
| "grad_norm": 0.011344632133841515, |
| "learning_rate": 8.752834467120183e-06, |
| "loss": 0.0005, |
| "step": 298 |
| }, |
| { |
| "epoch": 3.0510204081632653, |
| "grad_norm": 0.009169838391244411, |
| "learning_rate": 8.70748299319728e-06, |
| "loss": 0.0004, |
| "step": 299 |
| }, |
| { |
| "epoch": 3.061224489795918, |
| "grad_norm": 0.019571438431739807, |
| "learning_rate": 8.662131519274378e-06, |
| "loss": 0.0005, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.0714285714285716, |
| "grad_norm": 0.0035050984006375074, |
| "learning_rate": 8.616780045351474e-06, |
| "loss": 0.0002, |
| "step": 301 |
| }, |
| { |
| "epoch": 3.0816326530612246, |
| "grad_norm": 0.004051654599606991, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.0002, |
| "step": 302 |
| }, |
| { |
| "epoch": 3.0918367346938775, |
| "grad_norm": 0.002926639514043927, |
| "learning_rate": 8.52607709750567e-06, |
| "loss": 0.0002, |
| "step": 303 |
| }, |
| { |
| "epoch": 3.1020408163265305, |
| "grad_norm": 0.013055351562798023, |
| "learning_rate": 8.480725623582767e-06, |
| "loss": 0.0004, |
| "step": 304 |
| }, |
| { |
| "epoch": 3.1122448979591835, |
| "grad_norm": 0.004692048765718937, |
| "learning_rate": 8.435374149659866e-06, |
| "loss": 0.0002, |
| "step": 305 |
| }, |
| { |
| "epoch": 3.122448979591837, |
| "grad_norm": 0.0025202229153364897, |
| "learning_rate": 8.390022675736962e-06, |
| "loss": 0.0002, |
| "step": 306 |
| }, |
| { |
| "epoch": 3.13265306122449, |
| "grad_norm": 0.005598872900009155, |
| "learning_rate": 8.34467120181406e-06, |
| "loss": 0.0002, |
| "step": 307 |
| }, |
| { |
| "epoch": 3.142857142857143, |
| "grad_norm": 0.0032469748985022306, |
| "learning_rate": 8.299319727891157e-06, |
| "loss": 0.0002, |
| "step": 308 |
| }, |
| { |
| "epoch": 3.1530612244897958, |
| "grad_norm": 0.004803687799721956, |
| "learning_rate": 8.253968253968254e-06, |
| "loss": 0.0003, |
| "step": 309 |
| }, |
| { |
| "epoch": 3.163265306122449, |
| "grad_norm": 0.0046676271595060825, |
| "learning_rate": 8.208616780045352e-06, |
| "loss": 0.0003, |
| "step": 310 |
| }, |
| { |
| "epoch": 3.173469387755102, |
| "grad_norm": 0.002468443475663662, |
| "learning_rate": 8.16326530612245e-06, |
| "loss": 0.0002, |
| "step": 311 |
| }, |
| { |
| "epoch": 3.183673469387755, |
| "grad_norm": 0.006342902779579163, |
| "learning_rate": 8.117913832199548e-06, |
| "loss": 0.0004, |
| "step": 312 |
| }, |
| { |
| "epoch": 3.193877551020408, |
| "grad_norm": 0.0023443913087248802, |
| "learning_rate": 8.072562358276645e-06, |
| "loss": 0.0002, |
| "step": 313 |
| }, |
| { |
| "epoch": 3.204081632653061, |
| "grad_norm": 0.0020017994102090597, |
| "learning_rate": 8.027210884353741e-06, |
| "loss": 0.0001, |
| "step": 314 |
| }, |
| { |
| "epoch": 3.2142857142857144, |
| "grad_norm": 0.0026365304365754128, |
| "learning_rate": 7.98185941043084e-06, |
| "loss": 0.0002, |
| "step": 315 |
| }, |
| { |
| "epoch": 3.2244897959183674, |
| "grad_norm": 0.0056705656461417675, |
| "learning_rate": 7.936507936507936e-06, |
| "loss": 0.0004, |
| "step": 316 |
| }, |
| { |
| "epoch": 3.2346938775510203, |
| "grad_norm": 0.009689562022686005, |
| "learning_rate": 7.891156462585034e-06, |
| "loss": 0.0004, |
| "step": 317 |
| }, |
| { |
| "epoch": 3.2448979591836733, |
| "grad_norm": 0.008008199743926525, |
| "learning_rate": 7.845804988662133e-06, |
| "loss": 0.0003, |
| "step": 318 |
| }, |
| { |
| "epoch": 3.2551020408163267, |
| "grad_norm": 0.0026869464199990034, |
| "learning_rate": 7.800453514739231e-06, |
| "loss": 0.0002, |
| "step": 319 |
| }, |
| { |
| "epoch": 3.2653061224489797, |
| "grad_norm": 0.0031625712290406227, |
| "learning_rate": 7.755102040816327e-06, |
| "loss": 0.0002, |
| "step": 320 |
| }, |
| { |
| "epoch": 3.2755102040816326, |
| "grad_norm": 0.0028163609094917774, |
| "learning_rate": 7.709750566893424e-06, |
| "loss": 0.0002, |
| "step": 321 |
| }, |
| { |
| "epoch": 3.2857142857142856, |
| "grad_norm": 0.005660755559802055, |
| "learning_rate": 7.664399092970522e-06, |
| "loss": 0.0003, |
| "step": 322 |
| }, |
| { |
| "epoch": 3.295918367346939, |
| "grad_norm": 0.0039995694532990456, |
| "learning_rate": 7.61904761904762e-06, |
| "loss": 0.0003, |
| "step": 323 |
| }, |
| { |
| "epoch": 3.306122448979592, |
| "grad_norm": 0.005670357029885054, |
| "learning_rate": 7.573696145124717e-06, |
| "loss": 0.0003, |
| "step": 324 |
| }, |
| { |
| "epoch": 3.316326530612245, |
| "grad_norm": 0.0025813328102231026, |
| "learning_rate": 7.528344671201815e-06, |
| "loss": 0.0002, |
| "step": 325 |
| }, |
| { |
| "epoch": 3.326530612244898, |
| "grad_norm": 0.0030447246972471476, |
| "learning_rate": 7.482993197278913e-06, |
| "loss": 0.0002, |
| "step": 326 |
| }, |
| { |
| "epoch": 3.336734693877551, |
| "grad_norm": 0.0019142641685903072, |
| "learning_rate": 7.437641723356009e-06, |
| "loss": 0.0001, |
| "step": 327 |
| }, |
| { |
| "epoch": 3.3469387755102042, |
| "grad_norm": 0.004176548682153225, |
| "learning_rate": 7.3922902494331075e-06, |
| "loss": 0.0002, |
| "step": 328 |
| }, |
| { |
| "epoch": 3.357142857142857, |
| "grad_norm": 0.009249200113117695, |
| "learning_rate": 7.346938775510205e-06, |
| "loss": 0.0004, |
| "step": 329 |
| }, |
| { |
| "epoch": 3.36734693877551, |
| "grad_norm": 0.005048077553510666, |
| "learning_rate": 7.301587301587301e-06, |
| "loss": 0.0002, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.377551020408163, |
| "grad_norm": 0.0024696551263332367, |
| "learning_rate": 7.2562358276644e-06, |
| "loss": 0.0002, |
| "step": 331 |
| }, |
| { |
| "epoch": 3.387755102040816, |
| "grad_norm": 0.002270912518724799, |
| "learning_rate": 7.210884353741497e-06, |
| "loss": 0.0002, |
| "step": 332 |
| }, |
| { |
| "epoch": 3.3979591836734695, |
| "grad_norm": 0.0020916208159178495, |
| "learning_rate": 7.165532879818595e-06, |
| "loss": 0.0001, |
| "step": 333 |
| }, |
| { |
| "epoch": 3.4081632653061225, |
| "grad_norm": 0.002925699343904853, |
| "learning_rate": 7.120181405895692e-06, |
| "loss": 0.0002, |
| "step": 334 |
| }, |
| { |
| "epoch": 3.4183673469387754, |
| "grad_norm": 0.003520503407344222, |
| "learning_rate": 7.07482993197279e-06, |
| "loss": 0.0002, |
| "step": 335 |
| }, |
| { |
| "epoch": 3.4285714285714284, |
| "grad_norm": 0.0024117021821439266, |
| "learning_rate": 7.0294784580498875e-06, |
| "loss": 0.0001, |
| "step": 336 |
| }, |
| { |
| "epoch": 3.438775510204082, |
| "grad_norm": 0.011653084307909012, |
| "learning_rate": 6.984126984126984e-06, |
| "loss": 0.0005, |
| "step": 337 |
| }, |
| { |
| "epoch": 3.4489795918367347, |
| "grad_norm": 0.0021838736720383167, |
| "learning_rate": 6.938775510204082e-06, |
| "loss": 0.0001, |
| "step": 338 |
| }, |
| { |
| "epoch": 3.4591836734693877, |
| "grad_norm": 0.005142625421285629, |
| "learning_rate": 6.89342403628118e-06, |
| "loss": 0.0003, |
| "step": 339 |
| }, |
| { |
| "epoch": 3.4693877551020407, |
| "grad_norm": 0.009561867453157902, |
| "learning_rate": 6.848072562358277e-06, |
| "loss": 0.0003, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.479591836734694, |
| "grad_norm": 0.0029086614958941936, |
| "learning_rate": 6.8027210884353745e-06, |
| "loss": 0.0002, |
| "step": 341 |
| }, |
| { |
| "epoch": 3.489795918367347, |
| "grad_norm": 0.004574024584144354, |
| "learning_rate": 6.757369614512473e-06, |
| "loss": 0.0002, |
| "step": 342 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.002138937823474407, |
| "learning_rate": 6.71201814058957e-06, |
| "loss": 0.0001, |
| "step": 343 |
| }, |
| { |
| "epoch": 3.510204081632653, |
| "grad_norm": 0.003517791396006942, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.0002, |
| "step": 344 |
| }, |
| { |
| "epoch": 3.520408163265306, |
| "grad_norm": 0.03443054482340813, |
| "learning_rate": 6.621315192743765e-06, |
| "loss": 0.0008, |
| "step": 345 |
| }, |
| { |
| "epoch": 3.5306122448979593, |
| "grad_norm": 0.008042026311159134, |
| "learning_rate": 6.575963718820862e-06, |
| "loss": 0.0002, |
| "step": 346 |
| }, |
| { |
| "epoch": 3.5408163265306123, |
| "grad_norm": 0.0047872308641672134, |
| "learning_rate": 6.530612244897959e-06, |
| "loss": 0.0003, |
| "step": 347 |
| }, |
| { |
| "epoch": 3.5510204081632653, |
| "grad_norm": 0.010120042599737644, |
| "learning_rate": 6.485260770975057e-06, |
| "loss": 0.0003, |
| "step": 348 |
| }, |
| { |
| "epoch": 3.561224489795918, |
| "grad_norm": 0.004412388429045677, |
| "learning_rate": 6.4399092970521545e-06, |
| "loss": 0.0003, |
| "step": 349 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 0.003939002752304077, |
| "learning_rate": 6.394557823129253e-06, |
| "loss": 0.0002, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.5816326530612246, |
| "grad_norm": 0.003072823630645871, |
| "learning_rate": 6.349206349206349e-06, |
| "loss": 0.0002, |
| "step": 351 |
| }, |
| { |
| "epoch": 3.5918367346938775, |
| "grad_norm": 0.002594695193693042, |
| "learning_rate": 6.3038548752834475e-06, |
| "loss": 0.0002, |
| "step": 352 |
| }, |
| { |
| "epoch": 3.6020408163265305, |
| "grad_norm": 0.0021781930699944496, |
| "learning_rate": 6.258503401360545e-06, |
| "loss": 0.0001, |
| "step": 353 |
| }, |
| { |
| "epoch": 3.612244897959184, |
| "grad_norm": 0.004957903642207384, |
| "learning_rate": 6.2131519274376415e-06, |
| "loss": 0.0002, |
| "step": 354 |
| }, |
| { |
| "epoch": 3.622448979591837, |
| "grad_norm": 0.0020664699841290712, |
| "learning_rate": 6.16780045351474e-06, |
| "loss": 0.0001, |
| "step": 355 |
| }, |
| { |
| "epoch": 3.63265306122449, |
| "grad_norm": 0.00455419672653079, |
| "learning_rate": 6.122448979591837e-06, |
| "loss": 0.0002, |
| "step": 356 |
| }, |
| { |
| "epoch": 3.642857142857143, |
| "grad_norm": 0.001891249674372375, |
| "learning_rate": 6.077097505668935e-06, |
| "loss": 0.0001, |
| "step": 357 |
| }, |
| { |
| "epoch": 3.6530612244897958, |
| "grad_norm": 0.0015174165600910783, |
| "learning_rate": 6.031746031746032e-06, |
| "loss": 0.0001, |
| "step": 358 |
| }, |
| { |
| "epoch": 3.663265306122449, |
| "grad_norm": 0.008895975537598133, |
| "learning_rate": 5.98639455782313e-06, |
| "loss": 0.0003, |
| "step": 359 |
| }, |
| { |
| "epoch": 3.673469387755102, |
| "grad_norm": 0.010570279322564602, |
| "learning_rate": 5.9410430839002275e-06, |
| "loss": 0.0003, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.683673469387755, |
| "grad_norm": 0.005755205638706684, |
| "learning_rate": 5.895691609977324e-06, |
| "loss": 0.0002, |
| "step": 361 |
| }, |
| { |
| "epoch": 3.693877551020408, |
| "grad_norm": 0.00319477915763855, |
| "learning_rate": 5.850340136054422e-06, |
| "loss": 0.0002, |
| "step": 362 |
| }, |
| { |
| "epoch": 3.704081632653061, |
| "grad_norm": 0.0023295124992728233, |
| "learning_rate": 5.80498866213152e-06, |
| "loss": 0.0001, |
| "step": 363 |
| }, |
| { |
| "epoch": 3.7142857142857144, |
| "grad_norm": 0.0038169752806425095, |
| "learning_rate": 5.759637188208618e-06, |
| "loss": 0.0003, |
| "step": 364 |
| }, |
| { |
| "epoch": 3.7244897959183674, |
| "grad_norm": 0.007799374870955944, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.0003, |
| "step": 365 |
| }, |
| { |
| "epoch": 3.7346938775510203, |
| "grad_norm": 0.002488058526068926, |
| "learning_rate": 5.668934240362812e-06, |
| "loss": 0.0002, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.7448979591836737, |
| "grad_norm": 0.01512609887868166, |
| "learning_rate": 5.62358276643991e-06, |
| "loss": 0.0006, |
| "step": 367 |
| }, |
| { |
| "epoch": 3.7551020408163263, |
| "grad_norm": 0.004572188016027212, |
| "learning_rate": 5.578231292517007e-06, |
| "loss": 0.0003, |
| "step": 368 |
| }, |
| { |
| "epoch": 3.7653061224489797, |
| "grad_norm": 0.0024051195941865444, |
| "learning_rate": 5.532879818594105e-06, |
| "loss": 0.0002, |
| "step": 369 |
| }, |
| { |
| "epoch": 3.7755102040816326, |
| "grad_norm": 0.0032509195152670145, |
| "learning_rate": 5.487528344671202e-06, |
| "loss": 0.0002, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.7857142857142856, |
| "grad_norm": 0.0019066549139097333, |
| "learning_rate": 5.442176870748301e-06, |
| "loss": 0.0001, |
| "step": 371 |
| }, |
| { |
| "epoch": 3.795918367346939, |
| "grad_norm": 0.004059778060764074, |
| "learning_rate": 5.396825396825397e-06, |
| "loss": 0.0002, |
| "step": 372 |
| }, |
| { |
| "epoch": 3.806122448979592, |
| "grad_norm": 0.003823889186605811, |
| "learning_rate": 5.3514739229024945e-06, |
| "loss": 0.0002, |
| "step": 373 |
| }, |
| { |
| "epoch": 3.816326530612245, |
| "grad_norm": 0.005696111358702183, |
| "learning_rate": 5.306122448979593e-06, |
| "loss": 0.0003, |
| "step": 374 |
| }, |
| { |
| "epoch": 3.826530612244898, |
| "grad_norm": 0.002276304177939892, |
| "learning_rate": 5.260770975056689e-06, |
| "loss": 0.0001, |
| "step": 375 |
| }, |
| { |
| "epoch": 3.836734693877551, |
| "grad_norm": 0.003423569956794381, |
| "learning_rate": 5.2154195011337876e-06, |
| "loss": 0.0002, |
| "step": 376 |
| }, |
| { |
| "epoch": 3.8469387755102042, |
| "grad_norm": 0.009261609055101871, |
| "learning_rate": 5.170068027210885e-06, |
| "loss": 0.0004, |
| "step": 377 |
| }, |
| { |
| "epoch": 3.857142857142857, |
| "grad_norm": 0.0026830616407096386, |
| "learning_rate": 5.124716553287983e-06, |
| "loss": 0.0002, |
| "step": 378 |
| }, |
| { |
| "epoch": 3.86734693877551, |
| "grad_norm": 0.007292145863175392, |
| "learning_rate": 5.07936507936508e-06, |
| "loss": 0.0003, |
| "step": 379 |
| }, |
| { |
| "epoch": 3.877551020408163, |
| "grad_norm": 0.001463556895032525, |
| "learning_rate": 5.034013605442177e-06, |
| "loss": 0.0001, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.887755102040816, |
| "grad_norm": 0.0077773998491466045, |
| "learning_rate": 4.9886621315192745e-06, |
| "loss": 0.0003, |
| "step": 381 |
| }, |
| { |
| "epoch": 3.8979591836734695, |
| "grad_norm": 0.001333568710833788, |
| "learning_rate": 4.943310657596373e-06, |
| "loss": 0.0001, |
| "step": 382 |
| }, |
| { |
| "epoch": 3.9081632653061225, |
| "grad_norm": 0.0033744387328624725, |
| "learning_rate": 4.897959183673469e-06, |
| "loss": 0.0002, |
| "step": 383 |
| }, |
| { |
| "epoch": 3.9183673469387754, |
| "grad_norm": 0.0031404553446918726, |
| "learning_rate": 4.852607709750568e-06, |
| "loss": 0.0002, |
| "step": 384 |
| }, |
| { |
| "epoch": 3.928571428571429, |
| "grad_norm": 0.002246819669380784, |
| "learning_rate": 4.807256235827665e-06, |
| "loss": 0.0002, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.938775510204082, |
| "grad_norm": 0.006392229348421097, |
| "learning_rate": 4.761904761904762e-06, |
| "loss": 0.0003, |
| "step": 386 |
| }, |
| { |
| "epoch": 3.9489795918367347, |
| "grad_norm": 0.002482037292793393, |
| "learning_rate": 4.71655328798186e-06, |
| "loss": 0.0002, |
| "step": 387 |
| }, |
| { |
| "epoch": 3.9591836734693877, |
| "grad_norm": 0.0029472103342413902, |
| "learning_rate": 4.671201814058957e-06, |
| "loss": 0.0002, |
| "step": 388 |
| }, |
| { |
| "epoch": 3.9693877551020407, |
| "grad_norm": 0.001341557246632874, |
| "learning_rate": 4.6258503401360546e-06, |
| "loss": 0.0001, |
| "step": 389 |
| }, |
| { |
| "epoch": 3.979591836734694, |
| "grad_norm": 0.003789098234847188, |
| "learning_rate": 4.580498866213152e-06, |
| "loss": 0.0002, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.989795918367347, |
| "grad_norm": 0.0022696068044751883, |
| "learning_rate": 4.53514739229025e-06, |
| "loss": 0.0001, |
| "step": 391 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.0036536771804094315, |
| "learning_rate": 4.489795918367348e-06, |
| "loss": 0.0001, |
| "step": 392 |
| }, |
| { |
| "epoch": 4.010204081632653, |
| "grad_norm": 0.0017391832079738379, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.0001, |
| "step": 393 |
| }, |
| { |
| "epoch": 4.020408163265306, |
| "grad_norm": 0.003093178616836667, |
| "learning_rate": 4.399092970521542e-06, |
| "loss": 0.0002, |
| "step": 394 |
| }, |
| { |
| "epoch": 4.030612244897959, |
| "grad_norm": 0.0017501730471849442, |
| "learning_rate": 4.35374149659864e-06, |
| "loss": 0.0001, |
| "step": 395 |
| }, |
| { |
| "epoch": 4.040816326530612, |
| "grad_norm": 0.03281351551413536, |
| "learning_rate": 4.308390022675737e-06, |
| "loss": 0.0007, |
| "step": 396 |
| }, |
| { |
| "epoch": 4.051020408163265, |
| "grad_norm": 0.002625512657687068, |
| "learning_rate": 4.263038548752835e-06, |
| "loss": 0.0002, |
| "step": 397 |
| }, |
| { |
| "epoch": 4.061224489795919, |
| "grad_norm": 0.003758464241400361, |
| "learning_rate": 4.217687074829933e-06, |
| "loss": 0.0002, |
| "step": 398 |
| }, |
| { |
| "epoch": 4.071428571428571, |
| "grad_norm": 0.0021065385080873966, |
| "learning_rate": 4.17233560090703e-06, |
| "loss": 0.0001, |
| "step": 399 |
| }, |
| { |
| "epoch": 4.081632653061225, |
| "grad_norm": 0.0022317173425108194, |
| "learning_rate": 4.126984126984127e-06, |
| "loss": 0.0001, |
| "step": 400 |
| }, |
| { |
| "epoch": 4.091836734693878, |
| "grad_norm": 0.0025758843403309584, |
| "learning_rate": 4.081632653061225e-06, |
| "loss": 0.0002, |
| "step": 401 |
| }, |
| { |
| "epoch": 4.1020408163265305, |
| "grad_norm": 0.003262228099629283, |
| "learning_rate": 4.036281179138322e-06, |
| "loss": 0.0002, |
| "step": 402 |
| }, |
| { |
| "epoch": 4.112244897959184, |
| "grad_norm": 0.002355805365368724, |
| "learning_rate": 3.99092970521542e-06, |
| "loss": 0.0001, |
| "step": 403 |
| }, |
| { |
| "epoch": 4.122448979591836, |
| "grad_norm": 0.002239174908027053, |
| "learning_rate": 3.945578231292517e-06, |
| "loss": 0.0001, |
| "step": 404 |
| }, |
| { |
| "epoch": 4.13265306122449, |
| "grad_norm": 0.003173491917550564, |
| "learning_rate": 3.9002267573696154e-06, |
| "loss": 0.0002, |
| "step": 405 |
| }, |
| { |
| "epoch": 4.142857142857143, |
| "grad_norm": 0.009472887963056564, |
| "learning_rate": 3.854875283446712e-06, |
| "loss": 0.0004, |
| "step": 406 |
| }, |
| { |
| "epoch": 4.153061224489796, |
| "grad_norm": 0.010682443156838417, |
| "learning_rate": 3.80952380952381e-06, |
| "loss": 0.0004, |
| "step": 407 |
| }, |
| { |
| "epoch": 4.163265306122449, |
| "grad_norm": 0.01789182610809803, |
| "learning_rate": 3.7641723356009076e-06, |
| "loss": 0.0006, |
| "step": 408 |
| }, |
| { |
| "epoch": 4.173469387755102, |
| "grad_norm": 0.002530967351049185, |
| "learning_rate": 3.7188208616780046e-06, |
| "loss": 0.0001, |
| "step": 409 |
| }, |
| { |
| "epoch": 4.183673469387755, |
| "grad_norm": 0.0029371839482337236, |
| "learning_rate": 3.6734693877551024e-06, |
| "loss": 0.0002, |
| "step": 410 |
| }, |
| { |
| "epoch": 4.1938775510204085, |
| "grad_norm": 0.004367890767753124, |
| "learning_rate": 3.6281179138322e-06, |
| "loss": 0.0002, |
| "step": 411 |
| }, |
| { |
| "epoch": 4.204081632653061, |
| "grad_norm": 0.0021538427099585533, |
| "learning_rate": 3.5827664399092976e-06, |
| "loss": 0.0001, |
| "step": 412 |
| }, |
| { |
| "epoch": 4.214285714285714, |
| "grad_norm": 0.0021221789065748453, |
| "learning_rate": 3.537414965986395e-06, |
| "loss": 0.0001, |
| "step": 413 |
| }, |
| { |
| "epoch": 4.224489795918367, |
| "grad_norm": 0.0021122246980667114, |
| "learning_rate": 3.492063492063492e-06, |
| "loss": 0.0001, |
| "step": 414 |
| }, |
| { |
| "epoch": 4.23469387755102, |
| "grad_norm": 0.0025512792635709047, |
| "learning_rate": 3.44671201814059e-06, |
| "loss": 0.0001, |
| "step": 415 |
| }, |
| { |
| "epoch": 4.244897959183674, |
| "grad_norm": 0.0041538686491549015, |
| "learning_rate": 3.4013605442176872e-06, |
| "loss": 0.0003, |
| "step": 416 |
| }, |
| { |
| "epoch": 4.255102040816326, |
| "grad_norm": 0.0017510091420263052, |
| "learning_rate": 3.356009070294785e-06, |
| "loss": 0.0001, |
| "step": 417 |
| }, |
| { |
| "epoch": 4.26530612244898, |
| "grad_norm": 0.0024814323987811804, |
| "learning_rate": 3.3106575963718824e-06, |
| "loss": 0.0002, |
| "step": 418 |
| }, |
| { |
| "epoch": 4.275510204081632, |
| "grad_norm": 0.00235186074860394, |
| "learning_rate": 3.2653061224489794e-06, |
| "loss": 0.0001, |
| "step": 419 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.0030012091156095266, |
| "learning_rate": 3.2199546485260772e-06, |
| "loss": 0.0002, |
| "step": 420 |
| }, |
| { |
| "epoch": 4.295918367346939, |
| "grad_norm": 0.004921985324472189, |
| "learning_rate": 3.1746031746031746e-06, |
| "loss": 0.0003, |
| "step": 421 |
| }, |
| { |
| "epoch": 4.3061224489795915, |
| "grad_norm": 0.009844960644841194, |
| "learning_rate": 3.1292517006802725e-06, |
| "loss": 0.0004, |
| "step": 422 |
| }, |
| { |
| "epoch": 4.316326530612245, |
| "grad_norm": 0.003105542156845331, |
| "learning_rate": 3.08390022675737e-06, |
| "loss": 0.0002, |
| "step": 423 |
| }, |
| { |
| "epoch": 4.326530612244898, |
| "grad_norm": 0.005888419691473246, |
| "learning_rate": 3.0385487528344677e-06, |
| "loss": 0.0003, |
| "step": 424 |
| }, |
| { |
| "epoch": 4.336734693877551, |
| "grad_norm": 0.002076453994959593, |
| "learning_rate": 2.993197278911565e-06, |
| "loss": 0.0001, |
| "step": 425 |
| }, |
| { |
| "epoch": 4.346938775510204, |
| "grad_norm": 0.0016607132274657488, |
| "learning_rate": 2.947845804988662e-06, |
| "loss": 0.0001, |
| "step": 426 |
| }, |
| { |
| "epoch": 4.357142857142857, |
| "grad_norm": 0.0024275570176541805, |
| "learning_rate": 2.90249433106576e-06, |
| "loss": 0.0002, |
| "step": 427 |
| }, |
| { |
| "epoch": 4.36734693877551, |
| "grad_norm": 0.003902298165485263, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.0002, |
| "step": 428 |
| }, |
| { |
| "epoch": 4.377551020408164, |
| "grad_norm": 0.0023378883488476276, |
| "learning_rate": 2.811791383219955e-06, |
| "loss": 0.0002, |
| "step": 429 |
| }, |
| { |
| "epoch": 4.387755102040816, |
| "grad_norm": 0.0051103937439620495, |
| "learning_rate": 2.7664399092970525e-06, |
| "loss": 0.0002, |
| "step": 430 |
| }, |
| { |
| "epoch": 4.3979591836734695, |
| "grad_norm": 0.0026863350067287683, |
| "learning_rate": 2.7210884353741503e-06, |
| "loss": 0.0002, |
| "step": 431 |
| }, |
| { |
| "epoch": 4.408163265306122, |
| "grad_norm": 0.002001287881284952, |
| "learning_rate": 2.6757369614512473e-06, |
| "loss": 0.0001, |
| "step": 432 |
| }, |
| { |
| "epoch": 4.418367346938775, |
| "grad_norm": 0.008789247833192348, |
| "learning_rate": 2.6303854875283447e-06, |
| "loss": 0.0003, |
| "step": 433 |
| }, |
| { |
| "epoch": 4.428571428571429, |
| "grad_norm": 0.004610543139278889, |
| "learning_rate": 2.5850340136054425e-06, |
| "loss": 0.0002, |
| "step": 434 |
| }, |
| { |
| "epoch": 4.438775510204081, |
| "grad_norm": 0.00599480327218771, |
| "learning_rate": 2.53968253968254e-06, |
| "loss": 0.0003, |
| "step": 435 |
| }, |
| { |
| "epoch": 4.448979591836735, |
| "grad_norm": 0.00554778054356575, |
| "learning_rate": 2.4943310657596373e-06, |
| "loss": 0.0003, |
| "step": 436 |
| }, |
| { |
| "epoch": 4.459183673469388, |
| "grad_norm": 0.00669802725315094, |
| "learning_rate": 2.4489795918367347e-06, |
| "loss": 0.0003, |
| "step": 437 |
| }, |
| { |
| "epoch": 4.469387755102041, |
| "grad_norm": 0.0016791113885119557, |
| "learning_rate": 2.4036281179138325e-06, |
| "loss": 0.0001, |
| "step": 438 |
| }, |
| { |
| "epoch": 4.479591836734694, |
| "grad_norm": 0.0024550866801291704, |
| "learning_rate": 2.35827664399093e-06, |
| "loss": 0.0002, |
| "step": 439 |
| }, |
| { |
| "epoch": 4.489795918367347, |
| "grad_norm": 0.0024079831782728434, |
| "learning_rate": 2.3129251700680273e-06, |
| "loss": 0.0002, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.0028663375414907932, |
| "learning_rate": 2.267573696145125e-06, |
| "loss": 0.0002, |
| "step": 441 |
| }, |
| { |
| "epoch": 4.510204081632653, |
| "grad_norm": 0.008847690187394619, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.0003, |
| "step": 442 |
| }, |
| { |
| "epoch": 4.520408163265306, |
| "grad_norm": 0.005266358610242605, |
| "learning_rate": 2.17687074829932e-06, |
| "loss": 0.0003, |
| "step": 443 |
| }, |
| { |
| "epoch": 4.530612244897959, |
| "grad_norm": 0.0024195676669478416, |
| "learning_rate": 2.1315192743764173e-06, |
| "loss": 0.0002, |
| "step": 444 |
| }, |
| { |
| "epoch": 4.540816326530612, |
| "grad_norm": 0.00421124929562211, |
| "learning_rate": 2.086167800453515e-06, |
| "loss": 0.0002, |
| "step": 445 |
| }, |
| { |
| "epoch": 4.551020408163265, |
| "grad_norm": 0.0020824300590902567, |
| "learning_rate": 2.0408163265306125e-06, |
| "loss": 0.0001, |
| "step": 446 |
| }, |
| { |
| "epoch": 4.561224489795919, |
| "grad_norm": 0.005051845218986273, |
| "learning_rate": 1.99546485260771e-06, |
| "loss": 0.0002, |
| "step": 447 |
| }, |
| { |
| "epoch": 4.571428571428571, |
| "grad_norm": 0.0022977020125836134, |
| "learning_rate": 1.9501133786848077e-06, |
| "loss": 0.0002, |
| "step": 448 |
| }, |
| { |
| "epoch": 4.581632653061225, |
| "grad_norm": 0.001990046352148056, |
| "learning_rate": 1.904761904761905e-06, |
| "loss": 0.0001, |
| "step": 449 |
| }, |
| { |
| "epoch": 4.591836734693878, |
| "grad_norm": 0.0031708430033177137, |
| "learning_rate": 1.8594104308390023e-06, |
| "loss": 0.0002, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.6020408163265305, |
| "grad_norm": 0.0034788285847753286, |
| "learning_rate": 1.8140589569161e-06, |
| "loss": 0.0002, |
| "step": 451 |
| }, |
| { |
| "epoch": 4.612244897959184, |
| "grad_norm": 0.0018601809861138463, |
| "learning_rate": 1.7687074829931975e-06, |
| "loss": 0.0001, |
| "step": 452 |
| }, |
| { |
| "epoch": 4.622448979591836, |
| "grad_norm": 0.016590220853686333, |
| "learning_rate": 1.723356009070295e-06, |
| "loss": 0.0003, |
| "step": 453 |
| }, |
| { |
| "epoch": 4.63265306122449, |
| "grad_norm": 0.003050972009077668, |
| "learning_rate": 1.6780045351473925e-06, |
| "loss": 0.0002, |
| "step": 454 |
| }, |
| { |
| "epoch": 4.642857142857143, |
| "grad_norm": 0.002196480752900243, |
| "learning_rate": 1.6326530612244897e-06, |
| "loss": 0.0001, |
| "step": 455 |
| }, |
| { |
| "epoch": 4.653061224489796, |
| "grad_norm": 0.0025891121476888657, |
| "learning_rate": 1.5873015873015873e-06, |
| "loss": 0.0001, |
| "step": 456 |
| }, |
| { |
| "epoch": 4.663265306122449, |
| "grad_norm": 0.002245939103886485, |
| "learning_rate": 1.541950113378685e-06, |
| "loss": 0.0001, |
| "step": 457 |
| }, |
| { |
| "epoch": 4.673469387755102, |
| "grad_norm": 0.0021706093102693558, |
| "learning_rate": 1.4965986394557825e-06, |
| "loss": 0.0001, |
| "step": 458 |
| }, |
| { |
| "epoch": 4.683673469387755, |
| "grad_norm": 0.0034395295660942793, |
| "learning_rate": 1.45124716553288e-06, |
| "loss": 0.0002, |
| "step": 459 |
| }, |
| { |
| "epoch": 4.6938775510204085, |
| "grad_norm": 0.0023007583804428577, |
| "learning_rate": 1.4058956916099775e-06, |
| "loss": 0.0001, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.704081632653061, |
| "grad_norm": 0.004494468215852976, |
| "learning_rate": 1.3605442176870751e-06, |
| "loss": 0.0002, |
| "step": 461 |
| }, |
| { |
| "epoch": 4.714285714285714, |
| "grad_norm": 0.0017052018083631992, |
| "learning_rate": 1.3151927437641723e-06, |
| "loss": 0.0001, |
| "step": 462 |
| }, |
| { |
| "epoch": 4.724489795918368, |
| "grad_norm": 0.0016002283664420247, |
| "learning_rate": 1.26984126984127e-06, |
| "loss": 0.0001, |
| "step": 463 |
| }, |
| { |
| "epoch": 4.73469387755102, |
| "grad_norm": 0.0033771556336432695, |
| "learning_rate": 1.2244897959183673e-06, |
| "loss": 0.0002, |
| "step": 464 |
| }, |
| { |
| "epoch": 4.744897959183674, |
| "grad_norm": 0.002580232685431838, |
| "learning_rate": 1.179138321995465e-06, |
| "loss": 0.0002, |
| "step": 465 |
| }, |
| { |
| "epoch": 4.755102040816326, |
| "grad_norm": 0.0020106956362724304, |
| "learning_rate": 1.1337868480725626e-06, |
| "loss": 0.0001, |
| "step": 466 |
| }, |
| { |
| "epoch": 4.76530612244898, |
| "grad_norm": 0.0036402051337063313, |
| "learning_rate": 1.08843537414966e-06, |
| "loss": 0.0002, |
| "step": 467 |
| }, |
| { |
| "epoch": 4.775510204081632, |
| "grad_norm": 0.004601712804287672, |
| "learning_rate": 1.0430839002267576e-06, |
| "loss": 0.0002, |
| "step": 468 |
| }, |
| { |
| "epoch": 4.785714285714286, |
| "grad_norm": 0.002733904868364334, |
| "learning_rate": 9.97732426303855e-07, |
| "loss": 0.0002, |
| "step": 469 |
| }, |
| { |
| "epoch": 4.795918367346939, |
| "grad_norm": 0.0027253010775893927, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 0.0002, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.8061224489795915, |
| "grad_norm": 0.02664267271757126, |
| "learning_rate": 9.0702947845805e-07, |
| "loss": 0.0007, |
| "step": 471 |
| }, |
| { |
| "epoch": 4.816326530612245, |
| "grad_norm": 0.002349977381527424, |
| "learning_rate": 8.616780045351475e-07, |
| "loss": 0.0002, |
| "step": 472 |
| }, |
| { |
| "epoch": 4.826530612244898, |
| "grad_norm": 0.01735026389360428, |
| "learning_rate": 8.163265306122449e-07, |
| "loss": 0.0006, |
| "step": 473 |
| }, |
| { |
| "epoch": 4.836734693877551, |
| "grad_norm": 0.003476122161373496, |
| "learning_rate": 7.709750566893425e-07, |
| "loss": 0.0002, |
| "step": 474 |
| }, |
| { |
| "epoch": 4.846938775510204, |
| "grad_norm": 0.0016622812254354358, |
| "learning_rate": 7.2562358276644e-07, |
| "loss": 0.0001, |
| "step": 475 |
| }, |
| { |
| "epoch": 4.857142857142857, |
| "grad_norm": 0.0042258999310433865, |
| "learning_rate": 6.802721088435376e-07, |
| "loss": 0.0002, |
| "step": 476 |
| }, |
| { |
| "epoch": 4.86734693877551, |
| "grad_norm": 0.0023899853695183992, |
| "learning_rate": 6.34920634920635e-07, |
| "loss": 0.0001, |
| "step": 477 |
| }, |
| { |
| "epoch": 4.877551020408164, |
| "grad_norm": 0.0037495435681194067, |
| "learning_rate": 5.895691609977325e-07, |
| "loss": 0.0002, |
| "step": 478 |
| }, |
| { |
| "epoch": 4.887755102040816, |
| "grad_norm": 0.00362688978202641, |
| "learning_rate": 5.4421768707483e-07, |
| "loss": 0.0002, |
| "step": 479 |
| }, |
| { |
| "epoch": 4.8979591836734695, |
| "grad_norm": 0.005572126246988773, |
| "learning_rate": 4.988662131519275e-07, |
| "loss": 0.0003, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.908163265306122, |
| "grad_norm": 0.0033278209157288074, |
| "learning_rate": 4.53514739229025e-07, |
| "loss": 0.0002, |
| "step": 481 |
| }, |
| { |
| "epoch": 4.918367346938775, |
| "grad_norm": 0.0017971718916669488, |
| "learning_rate": 4.0816326530612243e-07, |
| "loss": 0.0001, |
| "step": 482 |
| }, |
| { |
| "epoch": 4.928571428571429, |
| "grad_norm": 0.003246983280405402, |
| "learning_rate": 3.6281179138322e-07, |
| "loss": 0.0002, |
| "step": 483 |
| }, |
| { |
| "epoch": 4.938775510204081, |
| "grad_norm": 0.002570765558630228, |
| "learning_rate": 3.174603174603175e-07, |
| "loss": 0.0002, |
| "step": 484 |
| }, |
| { |
| "epoch": 4.948979591836735, |
| "grad_norm": 0.003006896935403347, |
| "learning_rate": 2.72108843537415e-07, |
| "loss": 0.0002, |
| "step": 485 |
| }, |
| { |
| "epoch": 4.959183673469388, |
| "grad_norm": 0.003843962447717786, |
| "learning_rate": 2.267573696145125e-07, |
| "loss": 0.0002, |
| "step": 486 |
| }, |
| { |
| "epoch": 4.969387755102041, |
| "grad_norm": 0.005520265083760023, |
| "learning_rate": 1.8140589569161e-07, |
| "loss": 0.0002, |
| "step": 487 |
| }, |
| { |
| "epoch": 4.979591836734694, |
| "grad_norm": 0.0035644182935357094, |
| "learning_rate": 1.360544217687075e-07, |
| "loss": 0.0002, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.989795918367347, |
| "grad_norm": 0.009897944517433643, |
| "learning_rate": 9.0702947845805e-08, |
| "loss": 0.0004, |
| "step": 489 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.005771194584667683, |
| "learning_rate": 4.53514739229025e-08, |
| "loss": 0.0002, |
| "step": 490 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 490, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|