| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 525, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009523809523809525, | |
| "grad_norm": 7.880720841975985, | |
| "learning_rate": 1.509433962264151e-06, | |
| "loss": 1.5676, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01904761904761905, | |
| "grad_norm": 7.860108844770417, | |
| "learning_rate": 3.018867924528302e-06, | |
| "loss": 1.5671, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 7.7425094912660395, | |
| "learning_rate": 4.528301886792453e-06, | |
| "loss": 1.5511, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0380952380952381, | |
| "grad_norm": 5.876257335486737, | |
| "learning_rate": 6.037735849056604e-06, | |
| "loss": 1.5083, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.047619047619047616, | |
| "grad_norm": 2.92144085922626, | |
| "learning_rate": 7.5471698113207555e-06, | |
| "loss": 1.4418, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 2.432882075707435, | |
| "learning_rate": 9.056603773584907e-06, | |
| "loss": 1.4431, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 6.42341738028925, | |
| "learning_rate": 1.0566037735849058e-05, | |
| "loss": 1.4534, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0761904761904762, | |
| "grad_norm": 6.844420009717874, | |
| "learning_rate": 1.2075471698113209e-05, | |
| "loss": 1.4503, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 8.72224792197926, | |
| "learning_rate": 1.3584905660377358e-05, | |
| "loss": 1.4895, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 6.221915658880092, | |
| "learning_rate": 1.5094339622641511e-05, | |
| "loss": 1.4558, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10476190476190476, | |
| "grad_norm": 4.297680995629592, | |
| "learning_rate": 1.6603773584905664e-05, | |
| "loss": 1.4005, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 3.3637103263399886, | |
| "learning_rate": 1.8113207547169813e-05, | |
| "loss": 1.353, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.12380952380952381, | |
| "grad_norm": 2.102279140328028, | |
| "learning_rate": 1.9622641509433963e-05, | |
| "loss": 1.3221, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 2.5449653919034234, | |
| "learning_rate": 2.1132075471698115e-05, | |
| "loss": 1.3029, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 1.6782911213475984, | |
| "learning_rate": 2.2641509433962265e-05, | |
| "loss": 1.304, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1523809523809524, | |
| "grad_norm": 1.7888774786584605, | |
| "learning_rate": 2.4150943396226418e-05, | |
| "loss": 1.2618, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.1619047619047619, | |
| "grad_norm": 1.5997868512124438, | |
| "learning_rate": 2.5660377358490567e-05, | |
| "loss": 1.2583, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 1.2377999305796528, | |
| "learning_rate": 2.7169811320754716e-05, | |
| "loss": 1.2703, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.18095238095238095, | |
| "grad_norm": 1.486080611758641, | |
| "learning_rate": 2.867924528301887e-05, | |
| "loss": 1.2392, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 1.27108266101122, | |
| "learning_rate": 3.0188679245283022e-05, | |
| "loss": 1.2377, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.9831199960325674, | |
| "learning_rate": 3.169811320754717e-05, | |
| "loss": 1.2123, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.20952380952380953, | |
| "grad_norm": 1.7763756530914847, | |
| "learning_rate": 3.320754716981133e-05, | |
| "loss": 1.2336, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.21904761904761905, | |
| "grad_norm": 1.3105067418162915, | |
| "learning_rate": 3.471698113207548e-05, | |
| "loss": 1.2172, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 1.639321729355214, | |
| "learning_rate": 3.6226415094339626e-05, | |
| "loss": 1.2173, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 1.476554104561696, | |
| "learning_rate": 3.7735849056603776e-05, | |
| "loss": 1.201, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.24761904761904763, | |
| "grad_norm": 1.530897069889547, | |
| "learning_rate": 3.9245283018867925e-05, | |
| "loss": 1.2083, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.2571428571428571, | |
| "grad_norm": 1.5322183506473377, | |
| "learning_rate": 4.075471698113208e-05, | |
| "loss": 1.2032, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 1.3706277501006499, | |
| "learning_rate": 4.226415094339623e-05, | |
| "loss": 1.1896, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2761904761904762, | |
| "grad_norm": 1.675751835429746, | |
| "learning_rate": 4.377358490566038e-05, | |
| "loss": 1.1955, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 1.2446280009546904, | |
| "learning_rate": 4.528301886792453e-05, | |
| "loss": 1.1714, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.29523809523809524, | |
| "grad_norm": 1.232539792154481, | |
| "learning_rate": 4.679245283018868e-05, | |
| "loss": 1.1935, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.3047619047619048, | |
| "grad_norm": 1.1894442744080997, | |
| "learning_rate": 4.8301886792452835e-05, | |
| "loss": 1.1823, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.3142857142857143, | |
| "grad_norm": 1.6698359989666955, | |
| "learning_rate": 4.9811320754716985e-05, | |
| "loss": 1.1844, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3238095238095238, | |
| "grad_norm": 1.1584686212441244, | |
| "learning_rate": 5.1320754716981134e-05, | |
| "loss": 1.168, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 1.7374801678617988, | |
| "learning_rate": 5.283018867924528e-05, | |
| "loss": 1.175, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 1.3774261987594567, | |
| "learning_rate": 5.433962264150943e-05, | |
| "loss": 1.1885, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3523809523809524, | |
| "grad_norm": 1.144616111806608, | |
| "learning_rate": 5.584905660377359e-05, | |
| "loss": 1.1609, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3619047619047619, | |
| "grad_norm": 2.044834450527845, | |
| "learning_rate": 5.735849056603774e-05, | |
| "loss": 1.1627, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.37142857142857144, | |
| "grad_norm": 1.8059910262399932, | |
| "learning_rate": 5.886792452830189e-05, | |
| "loss": 1.1599, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 2.170117071827184, | |
| "learning_rate": 6.0377358490566044e-05, | |
| "loss": 1.1639, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3904761904761905, | |
| "grad_norm": 1.3892171371581836, | |
| "learning_rate": 6.18867924528302e-05, | |
| "loss": 1.1672, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 3.12613489185021, | |
| "learning_rate": 6.339622641509434e-05, | |
| "loss": 1.1621, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.4095238095238095, | |
| "grad_norm": 1.9337153601065566, | |
| "learning_rate": 6.49056603773585e-05, | |
| "loss": 1.1624, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.41904761904761906, | |
| "grad_norm": 2.8416059583439055, | |
| "learning_rate": 6.641509433962266e-05, | |
| "loss": 1.1505, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 1.9476637352918562, | |
| "learning_rate": 6.79245283018868e-05, | |
| "loss": 1.1661, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4380952380952381, | |
| "grad_norm": 2.276534088165202, | |
| "learning_rate": 6.943396226415095e-05, | |
| "loss": 1.1519, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.44761904761904764, | |
| "grad_norm": 1.7741206434308783, | |
| "learning_rate": 7.09433962264151e-05, | |
| "loss": 1.1397, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 1.2590011451410716, | |
| "learning_rate": 7.245283018867925e-05, | |
| "loss": 1.1496, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4666666666666667, | |
| "grad_norm": 2.2235594751942145, | |
| "learning_rate": 7.396226415094341e-05, | |
| "loss": 1.1646, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 1.3848690153622902, | |
| "learning_rate": 7.547169811320755e-05, | |
| "loss": 1.1446, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4857142857142857, | |
| "grad_norm": 2.2564098595161375, | |
| "learning_rate": 7.698113207547171e-05, | |
| "loss": 1.1521, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.49523809523809526, | |
| "grad_norm": 2.2598603091896234, | |
| "learning_rate": 7.849056603773585e-05, | |
| "loss": 1.1501, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.5047619047619047, | |
| "grad_norm": 1.9630215060124938, | |
| "learning_rate": 8e-05, | |
| "loss": 1.1558, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.5142857142857142, | |
| "grad_norm": 2.449644231642783, | |
| "learning_rate": 7.99991139787449e-05, | |
| "loss": 1.1478, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.5238095238095238, | |
| "grad_norm": 1.8285447577052554, | |
| "learning_rate": 7.999645595423128e-05, | |
| "loss": 1.1522, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 1.6446782549594272, | |
| "learning_rate": 7.999202604421244e-05, | |
| "loss": 1.1488, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.5428571428571428, | |
| "grad_norm": 1.6532082997276394, | |
| "learning_rate": 7.998582444493811e-05, | |
| "loss": 1.135, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5523809523809524, | |
| "grad_norm": 1.3217448563312193, | |
| "learning_rate": 7.997785143114573e-05, | |
| "loss": 1.1381, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5619047619047619, | |
| "grad_norm": 1.5193922578374879, | |
| "learning_rate": 7.996810735604828e-05, | |
| "loss": 1.1488, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 2.032121014977749, | |
| "learning_rate": 7.995659265131865e-05, | |
| "loss": 1.1486, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.580952380952381, | |
| "grad_norm": 1.1859854441122255, | |
| "learning_rate": 7.994330782707048e-05, | |
| "loss": 1.1463, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5904761904761905, | |
| "grad_norm": 1.8225832846306127, | |
| "learning_rate": 7.992825347183563e-05, | |
| "loss": 1.1422, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 3.1767099132769046, | |
| "learning_rate": 7.991143025253801e-05, | |
| "loss": 1.1281, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.6095238095238096, | |
| "grad_norm": 1.7321438313588222, | |
| "learning_rate": 7.989283891446413e-05, | |
| "loss": 1.1509, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.6190476190476191, | |
| "grad_norm": 3.7251686905910355, | |
| "learning_rate": 7.987248028123001e-05, | |
| "loss": 1.161, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.6285714285714286, | |
| "grad_norm": 2.9492630393156514, | |
| "learning_rate": 7.985035525474475e-05, | |
| "loss": 1.1583, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.638095238095238, | |
| "grad_norm": 2.9426097334554897, | |
| "learning_rate": 7.982646481517054e-05, | |
| "loss": 1.1442, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.6476190476190476, | |
| "grad_norm": 2.469295775843566, | |
| "learning_rate": 7.980081002087923e-05, | |
| "loss": 1.1401, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.6571428571428571, | |
| "grad_norm": 2.652729866210755, | |
| "learning_rate": 7.97733920084055e-05, | |
| "loss": 1.1488, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.2477356726812125, | |
| "learning_rate": 7.97442119923964e-05, | |
| "loss": 1.1241, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6761904761904762, | |
| "grad_norm": 2.071700619970133, | |
| "learning_rate": 7.971327126555767e-05, | |
| "loss": 1.1383, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 1.9746783226121274, | |
| "learning_rate": 7.968057119859639e-05, | |
| "loss": 1.132, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6952380952380952, | |
| "grad_norm": 1.5945211845534675, | |
| "learning_rate": 7.96461132401603e-05, | |
| "loss": 1.1326, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.7047619047619048, | |
| "grad_norm": 1.9842082262603224, | |
| "learning_rate": 7.960989891677354e-05, | |
| "loss": 1.131, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 1.0744704158928022, | |
| "learning_rate": 7.957192983276915e-05, | |
| "loss": 1.1286, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.7238095238095238, | |
| "grad_norm": 2.4648933927530954, | |
| "learning_rate": 7.953220767021789e-05, | |
| "loss": 1.1334, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.7333333333333333, | |
| "grad_norm": 1.6229327724824014, | |
| "learning_rate": 7.949073418885378e-05, | |
| "loss": 1.1268, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.7428571428571429, | |
| "grad_norm": 2.070091284758308, | |
| "learning_rate": 7.944751122599613e-05, | |
| "loss": 1.13, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.7523809523809524, | |
| "grad_norm": 1.5442744548822007, | |
| "learning_rate": 7.940254069646813e-05, | |
| "loss": 1.1219, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 1.737018571216328, | |
| "learning_rate": 7.935582459251202e-05, | |
| "loss": 1.1219, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7714285714285715, | |
| "grad_norm": 1.5613075146451556, | |
| "learning_rate": 7.930736498370085e-05, | |
| "loss": 1.1096, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.780952380952381, | |
| "grad_norm": 1.0920845324913702, | |
| "learning_rate": 7.925716401684678e-05, | |
| "loss": 1.119, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7904761904761904, | |
| "grad_norm": 1.9034104210285714, | |
| "learning_rate": 7.920522391590604e-05, | |
| "loss": 1.1236, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.345035723453052, | |
| "learning_rate": 7.915154698188027e-05, | |
| "loss": 1.1324, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.8095238095238095, | |
| "grad_norm": 1.2875694611881727, | |
| "learning_rate": 7.909613559271467e-05, | |
| "loss": 1.1136, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.819047619047619, | |
| "grad_norm": 1.0224645041826803, | |
| "learning_rate": 7.90389922031927e-05, | |
| "loss": 1.1226, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.8285714285714286, | |
| "grad_norm": 1.3257820885017062, | |
| "learning_rate": 7.898011934482725e-05, | |
| "loss": 1.1214, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.8380952380952381, | |
| "grad_norm": 1.839693710231441, | |
| "learning_rate": 7.89195196257485e-05, | |
| "loss": 1.1406, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.8476190476190476, | |
| "grad_norm": 1.2987193541355626, | |
| "learning_rate": 7.88571957305884e-05, | |
| "loss": 1.1231, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 1.4098694982637217, | |
| "learning_rate": 7.879315042036176e-05, | |
| "loss": 1.1386, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8666666666666667, | |
| "grad_norm": 1.6007046498489645, | |
| "learning_rate": 7.872738653234387e-05, | |
| "loss": 1.1108, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.8761904761904762, | |
| "grad_norm": 1.0544725776613069, | |
| "learning_rate": 7.865990697994488e-05, | |
| "loss": 1.1285, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8857142857142857, | |
| "grad_norm": 1.0388252500166617, | |
| "learning_rate": 7.859071475258065e-05, | |
| "loss": 1.1265, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.8952380952380953, | |
| "grad_norm": 1.3249551090301468, | |
| "learning_rate": 7.85198129155404e-05, | |
| "loss": 1.1229, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.9047619047619048, | |
| "grad_norm": 1.2506636003424314, | |
| "learning_rate": 7.844720460985086e-05, | |
| "loss": 1.1284, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 1.1709974083254924, | |
| "learning_rate": 7.837289305213715e-05, | |
| "loss": 1.1221, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.9238095238095239, | |
| "grad_norm": 1.5298739597522653, | |
| "learning_rate": 7.829688153448022e-05, | |
| "loss": 1.122, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 1.0400221166560895, | |
| "learning_rate": 7.821917342427112e-05, | |
| "loss": 1.1125, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.9428571428571428, | |
| "grad_norm": 1.1449775808812492, | |
| "learning_rate": 7.81397721640617e-05, | |
| "loss": 1.1098, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 1.09547909217965, | |
| "learning_rate": 7.805868127141217e-05, | |
| "loss": 1.1097, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9619047619047619, | |
| "grad_norm": 1.0999337584802662, | |
| "learning_rate": 7.797590433873526e-05, | |
| "loss": 1.1146, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.9714285714285714, | |
| "grad_norm": 1.5977618876155488, | |
| "learning_rate": 7.789144503313704e-05, | |
| "loss": 1.1375, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.9809523809523809, | |
| "grad_norm": 0.8781433045555708, | |
| "learning_rate": 7.780530709625455e-05, | |
| "loss": 1.1191, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.9904761904761905, | |
| "grad_norm": 1.2752214725002813, | |
| "learning_rate": 7.771749434408989e-05, | |
| "loss": 1.1173, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.0844449972573356, | |
| "learning_rate": 7.762801066684133e-05, | |
| "loss": 1.1137, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.0095238095238095, | |
| "grad_norm": 1.082120963718663, | |
| "learning_rate": 7.753686002873087e-05, | |
| "loss": 1.0737, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.019047619047619, | |
| "grad_norm": 1.1974759867644165, | |
| "learning_rate": 7.744404646782866e-05, | |
| "loss": 1.0849, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.0285714285714285, | |
| "grad_norm": 1.6630439720081593, | |
| "learning_rate": 7.734957409587404e-05, | |
| "loss": 1.0861, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.0380952380952382, | |
| "grad_norm": 0.8074012816288818, | |
| "learning_rate": 7.725344709809355e-05, | |
| "loss": 1.0785, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.0476190476190477, | |
| "grad_norm": 1.6368997832238315, | |
| "learning_rate": 7.715566973301529e-05, | |
| "loss": 1.0963, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.0571428571428572, | |
| "grad_norm": 1.297969917454931, | |
| "learning_rate": 7.70562463322805e-05, | |
| "loss": 1.0728, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 1.4036049691933277, | |
| "learning_rate": 7.695518130045147e-05, | |
| "loss": 1.0806, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.0761904761904761, | |
| "grad_norm": 0.9013266727291808, | |
| "learning_rate": 7.685247911481652e-05, | |
| "loss": 1.0981, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.0857142857142856, | |
| "grad_norm": 1.0917190640539323, | |
| "learning_rate": 7.674814432519163e-05, | |
| "loss": 1.0734, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.0952380952380953, | |
| "grad_norm": 1.0218269030800684, | |
| "learning_rate": 7.664218155371884e-05, | |
| "loss": 1.0658, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.1047619047619048, | |
| "grad_norm": 0.9481079847045792, | |
| "learning_rate": 7.653459549466157e-05, | |
| "loss": 1.086, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.1142857142857143, | |
| "grad_norm": 1.1795366957840878, | |
| "learning_rate": 7.642539091419654e-05, | |
| "loss": 1.1062, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.1238095238095238, | |
| "grad_norm": 1.2913118926572955, | |
| "learning_rate": 7.631457265020274e-05, | |
| "loss": 1.075, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.1333333333333333, | |
| "grad_norm": 0.8478614709821788, | |
| "learning_rate": 7.620214561204704e-05, | |
| "loss": 1.0683, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 1.0862987356843379, | |
| "learning_rate": 7.608811478036671e-05, | |
| "loss": 1.0846, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.1523809523809523, | |
| "grad_norm": 1.090222081314668, | |
| "learning_rate": 7.597248520684878e-05, | |
| "loss": 1.0905, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.161904761904762, | |
| "grad_norm": 0.7532978409995891, | |
| "learning_rate": 7.585526201400623e-05, | |
| "loss": 1.0791, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.1714285714285715, | |
| "grad_norm": 1.1901023584369215, | |
| "learning_rate": 7.57364503949511e-05, | |
| "loss": 1.0869, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.180952380952381, | |
| "grad_norm": 1.2655635735771573, | |
| "learning_rate": 7.56160556131644e-05, | |
| "loss": 1.0688, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.1904761904761905, | |
| "grad_norm": 0.8393572943728383, | |
| "learning_rate": 7.549408300226287e-05, | |
| "loss": 1.0798, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 1.0763767921818734, | |
| "learning_rate": 7.537053796576282e-05, | |
| "loss": 1.0755, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.2095238095238094, | |
| "grad_norm": 0.810181087854412, | |
| "learning_rate": 7.524542597684066e-05, | |
| "loss": 1.0722, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.2190476190476192, | |
| "grad_norm": 0.7248802686574851, | |
| "learning_rate": 7.51187525780905e-05, | |
| "loss": 1.0718, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.2285714285714286, | |
| "grad_norm": 1.3361324797449061, | |
| "learning_rate": 7.499052338127845e-05, | |
| "loss": 1.0778, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.2380952380952381, | |
| "grad_norm": 0.8378010550345113, | |
| "learning_rate": 7.486074406709429e-05, | |
| "loss": 1.068, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.2476190476190476, | |
| "grad_norm": 0.703515432967172, | |
| "learning_rate": 7.47294203848995e-05, | |
| "loss": 1.0832, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.2571428571428571, | |
| "grad_norm": 0.5700766848892007, | |
| "learning_rate": 7.459655815247278e-05, | |
| "loss": 1.0702, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.2666666666666666, | |
| "grad_norm": 0.7201933038596476, | |
| "learning_rate": 7.446216325575225e-05, | |
| "loss": 1.0751, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.276190476190476, | |
| "grad_norm": 0.9463247169207133, | |
| "learning_rate": 7.432624164857465e-05, | |
| "loss": 1.0597, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 1.0411988380085484, | |
| "learning_rate": 7.418879935241162e-05, | |
| "loss": 1.0694, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.2952380952380953, | |
| "grad_norm": 1.0701186830070861, | |
| "learning_rate": 7.404984245610296e-05, | |
| "loss": 1.0706, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.3047619047619048, | |
| "grad_norm": 1.1554699877331098, | |
| "learning_rate": 7.390937711558683e-05, | |
| "loss": 1.0619, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.3142857142857143, | |
| "grad_norm": 1.1704855402151975, | |
| "learning_rate": 7.376740955362715e-05, | |
| "loss": 1.0535, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.3238095238095238, | |
| "grad_norm": 1.1741068413982254, | |
| "learning_rate": 7.362394605953773e-05, | |
| "loss": 1.0775, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.2007489108920013, | |
| "learning_rate": 7.347899298890386e-05, | |
| "loss": 1.0726, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.342857142857143, | |
| "grad_norm": 0.9964002243232726, | |
| "learning_rate": 7.33325567633006e-05, | |
| "loss": 1.0602, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.3523809523809525, | |
| "grad_norm": 0.8355502476587309, | |
| "learning_rate": 7.31846438700084e-05, | |
| "loss": 1.0774, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.361904761904762, | |
| "grad_norm": 0.7029298880866707, | |
| "learning_rate": 7.303526086172558e-05, | |
| "loss": 1.0846, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.3714285714285714, | |
| "grad_norm": 0.9269565547406854, | |
| "learning_rate": 7.288441435627821e-05, | |
| "loss": 1.0627, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.380952380952381, | |
| "grad_norm": 1.0950185791329867, | |
| "learning_rate": 7.273211103632676e-05, | |
| "loss": 1.0841, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.3904761904761904, | |
| "grad_norm": 1.0029086769564732, | |
| "learning_rate": 7.25783576490702e-05, | |
| "loss": 1.0559, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.0116599010130591, | |
| "learning_rate": 7.242316100594696e-05, | |
| "loss": 1.063, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.4095238095238094, | |
| "grad_norm": 0.8363130430044641, | |
| "learning_rate": 7.226652798233327e-05, | |
| "loss": 1.0601, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.4190476190476191, | |
| "grad_norm": 0.7749911266241076, | |
| "learning_rate": 7.210846551723855e-05, | |
| "loss": 1.0655, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.8621768850747066, | |
| "learning_rate": 7.194898061299798e-05, | |
| "loss": 1.0687, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.438095238095238, | |
| "grad_norm": 1.0632400314141561, | |
| "learning_rate": 7.17880803349623e-05, | |
| "loss": 1.0594, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.4476190476190476, | |
| "grad_norm": 1.384452095634303, | |
| "learning_rate": 7.162577181118485e-05, | |
| "loss": 1.0747, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.457142857142857, | |
| "grad_norm": 0.6834451700354741, | |
| "learning_rate": 7.146206223210571e-05, | |
| "loss": 1.0722, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.4666666666666668, | |
| "grad_norm": 0.6642752224995787, | |
| "learning_rate": 7.129695885023321e-05, | |
| "loss": 1.0605, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.4761904761904763, | |
| "grad_norm": 1.2463789389464905, | |
| "learning_rate": 7.113046897982265e-05, | |
| "loss": 1.0678, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.4857142857142858, | |
| "grad_norm": 1.1799339849059132, | |
| "learning_rate": 7.09625999965522e-05, | |
| "loss": 1.0805, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.4952380952380953, | |
| "grad_norm": 0.5844684489642771, | |
| "learning_rate": 7.079335933719625e-05, | |
| "loss": 1.0627, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 1.5047619047619047, | |
| "grad_norm": 0.6983703223969, | |
| "learning_rate": 7.062275449929587e-05, | |
| "loss": 1.0685, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 1.5142857142857142, | |
| "grad_norm": 1.0157049520030346, | |
| "learning_rate": 7.045079304082667e-05, | |
| "loss": 1.057, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 1.5238095238095237, | |
| "grad_norm": 1.21522775185842, | |
| "learning_rate": 7.027748257986403e-05, | |
| "loss": 1.0497, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.5333333333333332, | |
| "grad_norm": 0.670913402144578, | |
| "learning_rate": 7.010283079424553e-05, | |
| "loss": 1.0714, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.5428571428571427, | |
| "grad_norm": 0.5725919893988906, | |
| "learning_rate": 6.992684542123094e-05, | |
| "loss": 1.0651, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.5523809523809524, | |
| "grad_norm": 0.5816949338841475, | |
| "learning_rate": 6.974953425715926e-05, | |
| "loss": 1.0625, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.561904761904762, | |
| "grad_norm": 0.621239138517028, | |
| "learning_rate": 6.957090515710353e-05, | |
| "loss": 1.06, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.5714285714285714, | |
| "grad_norm": 0.8232854555952457, | |
| "learning_rate": 6.939096603452269e-05, | |
| "loss": 1.0607, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.580952380952381, | |
| "grad_norm": 0.7980171860515516, | |
| "learning_rate": 6.920972486091113e-05, | |
| "loss": 1.069, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.5904761904761906, | |
| "grad_norm": 0.8469950727776042, | |
| "learning_rate": 6.902718966544545e-05, | |
| "loss": 1.0555, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.0917122939746875, | |
| "learning_rate": 6.884336853462879e-05, | |
| "loss": 1.073, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.6095238095238096, | |
| "grad_norm": 0.92965912537962, | |
| "learning_rate": 6.865826961193261e-05, | |
| "loss": 1.0662, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.619047619047619, | |
| "grad_norm": 0.7708978762494281, | |
| "learning_rate": 6.84719010974359e-05, | |
| "loss": 1.0524, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.6285714285714286, | |
| "grad_norm": 0.6756433297794424, | |
| "learning_rate": 6.828427124746191e-05, | |
| "loss": 1.0692, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.638095238095238, | |
| "grad_norm": 0.6747029343891224, | |
| "learning_rate": 6.80953883742124e-05, | |
| "loss": 1.0607, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.6476190476190475, | |
| "grad_norm": 0.6109245622680719, | |
| "learning_rate": 6.790526084539939e-05, | |
| "loss": 1.0578, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.657142857142857, | |
| "grad_norm": 0.7524343677177509, | |
| "learning_rate": 6.771389708387448e-05, | |
| "loss": 1.0666, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 1.1491906497079498, | |
| "learning_rate": 6.752130556725567e-05, | |
| "loss": 1.0694, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.6761904761904762, | |
| "grad_norm": 0.8088967378835389, | |
| "learning_rate": 6.73274948275518e-05, | |
| "loss": 1.0632, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.6857142857142857, | |
| "grad_norm": 0.4254639452827212, | |
| "learning_rate": 6.713247345078465e-05, | |
| "loss": 1.0543, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.6952380952380952, | |
| "grad_norm": 0.5687155529327818, | |
| "learning_rate": 6.693625007660845e-05, | |
| "loss": 1.0575, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.704761904761905, | |
| "grad_norm": 0.6667850106047994, | |
| "learning_rate": 6.673883339792723e-05, | |
| "loss": 1.055, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.612985384932019, | |
| "learning_rate": 6.654023216050963e-05, | |
| "loss": 1.071, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.723809523809524, | |
| "grad_norm": 0.5640012274326677, | |
| "learning_rate": 6.634045516260156e-05, | |
| "loss": 1.051, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.7333333333333334, | |
| "grad_norm": 0.499428294662914, | |
| "learning_rate": 6.613951125453632e-05, | |
| "loss": 1.0768, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.7428571428571429, | |
| "grad_norm": 0.42771581794966373, | |
| "learning_rate": 6.593740933834262e-05, | |
| "loss": 1.0566, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.7523809523809524, | |
| "grad_norm": 0.8182121479285371, | |
| "learning_rate": 6.573415836735011e-05, | |
| "loss": 1.073, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.7619047619047619, | |
| "grad_norm": 0.45335952004384406, | |
| "learning_rate": 6.552976734579281e-05, | |
| "loss": 1.0602, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.7714285714285714, | |
| "grad_norm": 0.47019401062933164, | |
| "learning_rate": 6.53242453284102e-05, | |
| "loss": 1.0592, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.7809523809523808, | |
| "grad_norm": 0.4216198182174328, | |
| "learning_rate": 6.511760142004608e-05, | |
| "loss": 1.0607, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.7904761904761903, | |
| "grad_norm": 0.4390063285487437, | |
| "learning_rate": 6.49098447752452e-05, | |
| "loss": 1.0685, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.44096942987470333, | |
| "learning_rate": 6.470098459784768e-05, | |
| "loss": 1.0566, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.8095238095238095, | |
| "grad_norm": 0.3569409404531036, | |
| "learning_rate": 6.449103014058139e-05, | |
| "loss": 1.0588, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.819047619047619, | |
| "grad_norm": 0.47305630507582075, | |
| "learning_rate": 6.427999070465191e-05, | |
| "loss": 1.0758, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.8285714285714287, | |
| "grad_norm": 0.34682771443136134, | |
| "learning_rate": 6.406787563933053e-05, | |
| "loss": 1.0561, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.8380952380952382, | |
| "grad_norm": 0.3784058265103603, | |
| "learning_rate": 6.385469434154006e-05, | |
| "loss": 1.0806, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.8476190476190477, | |
| "grad_norm": 0.45193884219920866, | |
| "learning_rate": 6.364045625543856e-05, | |
| "loss": 1.0601, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.8571428571428572, | |
| "grad_norm": 0.5301378494163913, | |
| "learning_rate": 6.342517087200094e-05, | |
| "loss": 1.0586, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 0.563802211092738, | |
| "learning_rate": 6.320884772859845e-05, | |
| "loss": 1.0571, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.8761904761904762, | |
| "grad_norm": 0.5201827508182623, | |
| "learning_rate": 6.29914964085763e-05, | |
| "loss": 1.0571, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.8857142857142857, | |
| "grad_norm": 0.4236836095612993, | |
| "learning_rate": 6.277312654082886e-05, | |
| "loss": 1.0606, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.8952380952380952, | |
| "grad_norm": 0.38720748352240386, | |
| "learning_rate": 6.255374779937344e-05, | |
| "loss": 1.0493, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 0.42659615830981307, | |
| "learning_rate": 6.23333699029214e-05, | |
| "loss": 1.0619, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.9142857142857141, | |
| "grad_norm": 0.6061421220455705, | |
| "learning_rate": 6.211200261444774e-05, | |
| "loss": 1.0541, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.9238095238095239, | |
| "grad_norm": 0.797781403199275, | |
| "learning_rate": 6.188965574075863e-05, | |
| "loss": 1.0559, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.9333333333333333, | |
| "grad_norm": 0.8282451159457771, | |
| "learning_rate": 6.166633913205684e-05, | |
| "loss": 1.0545, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.9428571428571428, | |
| "grad_norm": 0.9009492897590493, | |
| "learning_rate": 6.144206268150549e-05, | |
| "loss": 1.0586, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.9523809523809523, | |
| "grad_norm": 1.0020870163569926, | |
| "learning_rate": 6.12168363247897e-05, | |
| "loss": 1.0457, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.961904761904762, | |
| "grad_norm": 1.1260023046138727, | |
| "learning_rate": 6.0990670039676416e-05, | |
| "loss": 1.054, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.9714285714285715, | |
| "grad_norm": 0.7820493480578619, | |
| "learning_rate": 6.0763573845572434e-05, | |
| "loss": 1.0674, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.980952380952381, | |
| "grad_norm": 0.47269444067540217, | |
| "learning_rate": 6.053555780308049e-05, | |
| "loss": 1.0469, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.9904761904761905, | |
| "grad_norm": 0.30952011756676745, | |
| "learning_rate": 6.03066320135536e-05, | |
| "loss": 1.0457, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5189001553144519, | |
| "learning_rate": 6.0076806618647545e-05, | |
| "loss": 1.0549, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.0095238095238095, | |
| "grad_norm": 0.7439078466919193, | |
| "learning_rate": 5.984609179987155e-05, | |
| "loss": 1.0304, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 2.019047619047619, | |
| "grad_norm": 0.8444989260384058, | |
| "learning_rate": 5.961449777813727e-05, | |
| "loss": 1.0483, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.0285714285714285, | |
| "grad_norm": 0.85105512066596, | |
| "learning_rate": 5.9382034813306014e-05, | |
| "loss": 1.0192, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 2.038095238095238, | |
| "grad_norm": 0.8717088777508358, | |
| "learning_rate": 5.914871320373417e-05, | |
| "loss": 1.0262, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 2.0476190476190474, | |
| "grad_norm": 0.8473077460098071, | |
| "learning_rate": 5.891454328581702e-05, | |
| "loss": 1.0354, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.057142857142857, | |
| "grad_norm": 0.7459552028603442, | |
| "learning_rate": 5.8679535433530756e-05, | |
| "loss": 1.0151, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.066666666666667, | |
| "grad_norm": 0.6103690018974712, | |
| "learning_rate": 5.844370005797304e-05, | |
| "loss": 1.0169, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 2.0761904761904764, | |
| "grad_norm": 0.4922703155453895, | |
| "learning_rate": 5.820704760690161e-05, | |
| "loss": 1.0189, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 2.085714285714286, | |
| "grad_norm": 0.5537849831993843, | |
| "learning_rate": 5.796958856427155e-05, | |
| "loss": 1.0177, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 2.0952380952380953, | |
| "grad_norm": 0.5404302742305147, | |
| "learning_rate": 5.7731333449770833e-05, | |
| "loss": 1.0095, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.104761904761905, | |
| "grad_norm": 0.4534851743612291, | |
| "learning_rate": 5.7492292818354224e-05, | |
| "loss": 1.0187, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 2.1142857142857143, | |
| "grad_norm": 0.3065835944952437, | |
| "learning_rate": 5.725247725977573e-05, | |
| "loss": 1.0034, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 2.123809523809524, | |
| "grad_norm": 0.4746500125323341, | |
| "learning_rate": 5.7011897398119486e-05, | |
| "loss": 1.0156, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 0.5364964068164462, | |
| "learning_rate": 5.6770563891329e-05, | |
| "loss": 1.023, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.39435975272411355, | |
| "learning_rate": 5.652848743073513e-05, | |
| "loss": 1.0161, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.1523809523809523, | |
| "grad_norm": 0.2917840449905482, | |
| "learning_rate": 5.628567874058235e-05, | |
| "loss": 1.0176, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 2.1619047619047618, | |
| "grad_norm": 0.4309558938275066, | |
| "learning_rate": 5.6042148577553665e-05, | |
| "loss": 1.0189, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 2.1714285714285713, | |
| "grad_norm": 0.4049822234252623, | |
| "learning_rate": 5.5797907730294123e-05, | |
| "loss": 1.0079, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.1809523809523808, | |
| "grad_norm": 0.2826706608436985, | |
| "learning_rate": 5.555296701893284e-05, | |
| "loss": 1.0161, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 0.4502126269593606, | |
| "learning_rate": 5.5307337294603595e-05, | |
| "loss": 1.0196, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.41114563221017636, | |
| "learning_rate": 5.506102943896426e-05, | |
| "loss": 1.016, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 2.2095238095238097, | |
| "grad_norm": 0.313503216833648, | |
| "learning_rate": 5.481405436371459e-05, | |
| "loss": 1.0089, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 2.219047619047619, | |
| "grad_norm": 0.36100525990234567, | |
| "learning_rate": 5.45664230101129e-05, | |
| "loss": 1.0031, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 2.2285714285714286, | |
| "grad_norm": 0.3757564707600635, | |
| "learning_rate": 5.431814634849131e-05, | |
| "loss": 1.0144, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 2.238095238095238, | |
| "grad_norm": 0.3578831855885671, | |
| "learning_rate": 5.40692353777698e-05, | |
| "loss": 1.0291, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 2.2476190476190476, | |
| "grad_norm": 0.279381430603173, | |
| "learning_rate": 5.38197011249689e-05, | |
| "loss": 1.0038, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 2.257142857142857, | |
| "grad_norm": 0.31128636254168296, | |
| "learning_rate": 5.356955464472121e-05, | |
| "loss": 1.0132, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 2.2666666666666666, | |
| "grad_norm": 0.28395461762897617, | |
| "learning_rate": 5.331880701878165e-05, | |
| "loss": 1.0116, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 2.276190476190476, | |
| "grad_norm": 0.27391953654101664, | |
| "learning_rate": 5.3067469355536525e-05, | |
| "loss": 1.0183, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.22816906449119856, | |
| "learning_rate": 5.2815552789511426e-05, | |
| "loss": 1.0031, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.295238095238095, | |
| "grad_norm": 0.2370061459930583, | |
| "learning_rate": 5.256306848087796e-05, | |
| "loss": 1.0243, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 2.3047619047619046, | |
| "grad_norm": 0.2679568465187226, | |
| "learning_rate": 5.2310027614959316e-05, | |
| "loss": 1.003, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 2.314285714285714, | |
| "grad_norm": 0.26159769690103984, | |
| "learning_rate": 5.20564414017348e-05, | |
| "loss": 1.0286, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 2.323809523809524, | |
| "grad_norm": 0.2277499325426611, | |
| "learning_rate": 5.1802321075343135e-05, | |
| "loss": 1.0158, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.2765055373878013, | |
| "learning_rate": 5.1547677893584846e-05, | |
| "loss": 1.0176, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 2.342857142857143, | |
| "grad_norm": 0.2707671528392851, | |
| "learning_rate": 5.129252313742353e-05, | |
| "loss": 1.012, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 2.3523809523809525, | |
| "grad_norm": 0.24663987368382112, | |
| "learning_rate": 5.103686811048603e-05, | |
| "loss": 1.005, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 2.361904761904762, | |
| "grad_norm": 0.3119601934853684, | |
| "learning_rate": 5.078072413856174e-05, | |
| "loss": 0.9982, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 2.3714285714285714, | |
| "grad_norm": 0.28361246368044496, | |
| "learning_rate": 5.052410256910085e-05, | |
| "loss": 1.0106, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.210984716029959, | |
| "learning_rate": 5.026701477071161e-05, | |
| "loss": 1.0249, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.3904761904761904, | |
| "grad_norm": 0.2639314428580076, | |
| "learning_rate": 5.00094721326567e-05, | |
| "loss": 1.0133, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.2077340280233697, | |
| "learning_rate": 4.9751486064348695e-05, | |
| "loss": 1.0166, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.4095238095238094, | |
| "grad_norm": 0.20906067378182863, | |
| "learning_rate": 4.9493067994844606e-05, | |
| "loss": 1.0017, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 2.419047619047619, | |
| "grad_norm": 0.31463242576457345, | |
| "learning_rate": 4.9234229372339525e-05, | |
| "loss": 1.0167, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 2.4285714285714284, | |
| "grad_norm": 0.23613151956098302, | |
| "learning_rate": 4.897498166365953e-05, | |
| "loss": 1.0148, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 2.4380952380952383, | |
| "grad_norm": 0.2809994241715782, | |
| "learning_rate": 4.8715336353753616e-05, | |
| "loss": 1.0168, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.447619047619048, | |
| "grad_norm": 0.28140513046492216, | |
| "learning_rate": 4.845530494518498e-05, | |
| "loss": 1.0174, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 2.4571428571428573, | |
| "grad_norm": 0.26072644777277915, | |
| "learning_rate": 4.819489895762135e-05, | |
| "loss": 1.003, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 2.466666666666667, | |
| "grad_norm": 0.24673982217299864, | |
| "learning_rate": 4.7934129927324717e-05, | |
| "loss": 1.0098, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 2.4761904761904763, | |
| "grad_norm": 0.2336437136943171, | |
| "learning_rate": 4.7673009406640264e-05, | |
| "loss": 1.0186, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.4857142857142858, | |
| "grad_norm": 0.29231870944215277, | |
| "learning_rate": 4.741154896348458e-05, | |
| "loss": 1.0068, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 2.4952380952380953, | |
| "grad_norm": 0.2911376621948982, | |
| "learning_rate": 4.714976018083315e-05, | |
| "loss": 1.0224, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 2.5047619047619047, | |
| "grad_norm": 0.3338692874924189, | |
| "learning_rate": 4.6887654656207255e-05, | |
| "loss": 1.0195, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 2.5142857142857142, | |
| "grad_norm": 0.2541097184148386, | |
| "learning_rate": 4.66252440011602e-05, | |
| "loss": 1.003, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 2.5238095238095237, | |
| "grad_norm": 0.24770604612278468, | |
| "learning_rate": 4.6362539840762886e-05, | |
| "loss": 1.0223, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.533333333333333, | |
| "grad_norm": 0.2191655449635504, | |
| "learning_rate": 4.60995538130888e-05, | |
| "loss": 1.0145, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 2.5428571428571427, | |
| "grad_norm": 0.2790975433249823, | |
| "learning_rate": 4.5836297568698475e-05, | |
| "loss": 1.0126, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 2.552380952380952, | |
| "grad_norm": 0.28428156686414346, | |
| "learning_rate": 4.557278277012329e-05, | |
| "loss": 1.0295, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 2.5619047619047617, | |
| "grad_norm": 0.3233959922060247, | |
| "learning_rate": 4.5309021091348885e-05, | |
| "loss": 1.0174, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.2956250139389734, | |
| "learning_rate": 4.504502421729795e-05, | |
| "loss": 1.0215, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.580952380952381, | |
| "grad_norm": 0.2009944046883891, | |
| "learning_rate": 4.478080384331255e-05, | |
| "loss": 1.011, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 2.5904761904761906, | |
| "grad_norm": 0.18530962666368314, | |
| "learning_rate": 4.4516371674636074e-05, | |
| "loss": 1.0181, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.19572463370442308, | |
| "learning_rate": 4.425173942589462e-05, | |
| "loss": 1.0222, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 2.6095238095238096, | |
| "grad_norm": 0.20133706322572675, | |
| "learning_rate": 4.398691882057804e-05, | |
| "loss": 1.0238, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 2.619047619047619, | |
| "grad_norm": 0.1696896722933609, | |
| "learning_rate": 4.372192159052058e-05, | |
| "loss": 1.0121, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.6285714285714286, | |
| "grad_norm": 0.5599358185228015, | |
| "learning_rate": 4.3456759475381183e-05, | |
| "loss": 1.0468, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 2.638095238095238, | |
| "grad_norm": 0.20466020884323685, | |
| "learning_rate": 4.3191444222123326e-05, | |
| "loss": 1.0211, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 2.6476190476190475, | |
| "grad_norm": 0.18418573847215977, | |
| "learning_rate": 4.2925987584494706e-05, | |
| "loss": 1.013, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 2.657142857142857, | |
| "grad_norm": 0.20944834576237945, | |
| "learning_rate": 4.26604013225065e-05, | |
| "loss": 1.0165, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.17888267966598845, | |
| "learning_rate": 4.239469720191234e-05, | |
| "loss": 1.0285, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.6761904761904765, | |
| "grad_norm": 0.24111408627710695, | |
| "learning_rate": 4.2128886993687145e-05, | |
| "loss": 1.011, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 2.685714285714286, | |
| "grad_norm": 0.25283589971223985, | |
| "learning_rate": 4.186298247350567e-05, | |
| "loss": 1.0021, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 2.6952380952380954, | |
| "grad_norm": 0.19532012129248275, | |
| "learning_rate": 4.159699542122071e-05, | |
| "loss": 1.0113, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 2.704761904761905, | |
| "grad_norm": 0.2297669069644532, | |
| "learning_rate": 4.133093762034137e-05, | |
| "loss": 1.0032, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.7142857142857144, | |
| "grad_norm": 0.2465633017960609, | |
| "learning_rate": 4.1064820857511e-05, | |
| "loss": 0.9943, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.723809523809524, | |
| "grad_norm": 0.24245129662503742, | |
| "learning_rate": 4.079865692198499e-05, | |
| "loss": 1.0164, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 2.7333333333333334, | |
| "grad_norm": 0.22833439764975844, | |
| "learning_rate": 4.053245760510856e-05, | |
| "loss": 1.0161, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 2.742857142857143, | |
| "grad_norm": 0.22230175915043607, | |
| "learning_rate": 4.026623469979436e-05, | |
| "loss": 1.0091, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 2.7523809523809524, | |
| "grad_norm": 0.2781762656155117, | |
| "learning_rate": 4e-05, | |
| "loss": 1.0017, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 2.761904761904762, | |
| "grad_norm": 0.2864072028925091, | |
| "learning_rate": 3.9733765300205654e-05, | |
| "loss": 1.0075, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.7714285714285714, | |
| "grad_norm": 0.22898372701526665, | |
| "learning_rate": 3.946754239489146e-05, | |
| "loss": 1.0122, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 2.780952380952381, | |
| "grad_norm": 0.21256356082358066, | |
| "learning_rate": 3.9201343078015026e-05, | |
| "loss": 1.008, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 2.7904761904761903, | |
| "grad_norm": 0.2818845650719653, | |
| "learning_rate": 3.8935179142489016e-05, | |
| "loss": 0.9986, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.23141186471770828, | |
| "learning_rate": 3.866906237965865e-05, | |
| "loss": 1.0165, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 2.8095238095238093, | |
| "grad_norm": 0.23784103111468072, | |
| "learning_rate": 3.840300457877931e-05, | |
| "loss": 1.0226, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.819047619047619, | |
| "grad_norm": 0.20078316045157482, | |
| "learning_rate": 3.813701752649435e-05, | |
| "loss": 1.0015, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 2.8285714285714287, | |
| "grad_norm": 0.24338393846551132, | |
| "learning_rate": 3.787111300631287e-05, | |
| "loss": 1.0067, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 2.8380952380952382, | |
| "grad_norm": 0.20074428570812403, | |
| "learning_rate": 3.7605302798087686e-05, | |
| "loss": 1.0122, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 2.8476190476190477, | |
| "grad_norm": 0.20714128390839137, | |
| "learning_rate": 3.7339598677493515e-05, | |
| "loss": 1.0035, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.17387463464530994, | |
| "learning_rate": 3.7074012415505294e-05, | |
| "loss": 1.0106, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.8666666666666667, | |
| "grad_norm": 0.19763448671477987, | |
| "learning_rate": 3.6808555777876673e-05, | |
| "loss": 1.0071, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 2.876190476190476, | |
| "grad_norm": 0.193551861844252, | |
| "learning_rate": 3.654324052461883e-05, | |
| "loss": 1.0149, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 2.8857142857142857, | |
| "grad_norm": 0.20465104040557303, | |
| "learning_rate": 3.6278078409479424e-05, | |
| "loss": 1.0132, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 2.895238095238095, | |
| "grad_norm": 0.1858868750629034, | |
| "learning_rate": 3.6013081179421965e-05, | |
| "loss": 1.0113, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 2.9047619047619047, | |
| "grad_norm": 0.19231783181407783, | |
| "learning_rate": 3.5748260574105395e-05, | |
| "loss": 1.0223, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.914285714285714, | |
| "grad_norm": 0.20811259594067136, | |
| "learning_rate": 3.548362832536393e-05, | |
| "loss": 1.031, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 2.923809523809524, | |
| "grad_norm": 0.17374784949143135, | |
| "learning_rate": 3.5219196156687454e-05, | |
| "loss": 1.0143, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 0.18941317471635122, | |
| "learning_rate": 3.495497578270206e-05, | |
| "loss": 1.0067, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 2.942857142857143, | |
| "grad_norm": 0.2175969420603483, | |
| "learning_rate": 3.469097890865113e-05, | |
| "loss": 1.0149, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 2.9523809523809526, | |
| "grad_norm": 0.20305721352150802, | |
| "learning_rate": 3.442721722987673e-05, | |
| "loss": 1.0148, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.961904761904762, | |
| "grad_norm": 0.1735412980023502, | |
| "learning_rate": 3.416370243130154e-05, | |
| "loss": 1.0308, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 2.9714285714285715, | |
| "grad_norm": 0.23287418335200405, | |
| "learning_rate": 3.390044618691121e-05, | |
| "loss": 1.0181, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 2.980952380952381, | |
| "grad_norm": 0.1731215573089734, | |
| "learning_rate": 3.363746015923713e-05, | |
| "loss": 1.0141, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 2.9904761904761905, | |
| "grad_norm": 0.36664679839921727, | |
| "learning_rate": 3.337475599883981e-05, | |
| "loss": 1.0252, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.20808906149822945, | |
| "learning_rate": 3.3112345343792765e-05, | |
| "loss": 1.0094, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 3.0095238095238095, | |
| "grad_norm": 0.2386737684696705, | |
| "learning_rate": 3.285023981916687e-05, | |
| "loss": 0.985, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 3.019047619047619, | |
| "grad_norm": 0.21859428661149227, | |
| "learning_rate": 3.2588451036515435e-05, | |
| "loss": 0.9875, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 3.0285714285714285, | |
| "grad_norm": 0.21686946170692808, | |
| "learning_rate": 3.2326990593359756e-05, | |
| "loss": 0.9648, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.038095238095238, | |
| "grad_norm": 0.23585519844179503, | |
| "learning_rate": 3.206587007267528e-05, | |
| "loss": 0.9804, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 3.0476190476190474, | |
| "grad_norm": 0.23471049638102, | |
| "learning_rate": 3.1805101042378665e-05, | |
| "loss": 0.9764, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.057142857142857, | |
| "grad_norm": 0.24601337714676638, | |
| "learning_rate": 3.154469505481503e-05, | |
| "loss": 0.9787, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 3.066666666666667, | |
| "grad_norm": 0.24680145517708438, | |
| "learning_rate": 3.128466364624638e-05, | |
| "loss": 0.9751, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 3.0761904761904764, | |
| "grad_norm": 0.20266157663662596, | |
| "learning_rate": 3.1025018336340484e-05, | |
| "loss": 0.9786, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 3.085714285714286, | |
| "grad_norm": 0.23172246726109874, | |
| "learning_rate": 3.076577062766049e-05, | |
| "loss": 0.9797, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 3.0952380952380953, | |
| "grad_norm": 0.20530652182231443, | |
| "learning_rate": 3.0506932005155407e-05, | |
| "loss": 0.9776, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 3.104761904761905, | |
| "grad_norm": 0.19541728346574888, | |
| "learning_rate": 3.024851393565132e-05, | |
| "loss": 0.9747, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 3.1142857142857143, | |
| "grad_norm": 0.24999279824271964, | |
| "learning_rate": 2.999052786734331e-05, | |
| "loss": 0.988, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 3.123809523809524, | |
| "grad_norm": 0.22057462842837305, | |
| "learning_rate": 2.9732985229288397e-05, | |
| "loss": 0.9758, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 3.1333333333333333, | |
| "grad_norm": 0.21976681898897724, | |
| "learning_rate": 2.9475897430899157e-05, | |
| "loss": 0.9778, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 3.142857142857143, | |
| "grad_norm": 0.22563434482551034, | |
| "learning_rate": 2.921927586143827e-05, | |
| "loss": 0.989, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.1523809523809523, | |
| "grad_norm": 0.1951899724158065, | |
| "learning_rate": 2.8963131889513986e-05, | |
| "loss": 0.9723, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 3.1619047619047618, | |
| "grad_norm": 0.31930450117417375, | |
| "learning_rate": 2.870747686257649e-05, | |
| "loss": 0.9743, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 3.1714285714285713, | |
| "grad_norm": 0.24347478004659553, | |
| "learning_rate": 2.845232210641517e-05, | |
| "loss": 0.9658, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 3.1809523809523808, | |
| "grad_norm": 0.22607819816932653, | |
| "learning_rate": 2.8197678924656886e-05, | |
| "loss": 0.9666, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 3.1904761904761907, | |
| "grad_norm": 0.26083022022974084, | |
| "learning_rate": 2.7943558598265218e-05, | |
| "loss": 0.9875, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.20430972542099413, | |
| "learning_rate": 2.7689972385040697e-05, | |
| "loss": 0.9817, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 3.2095238095238097, | |
| "grad_norm": 0.2478055729854012, | |
| "learning_rate": 2.743693151912206e-05, | |
| "loss": 0.9833, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 3.219047619047619, | |
| "grad_norm": 0.20670211431468252, | |
| "learning_rate": 2.718444721048859e-05, | |
| "loss": 0.9768, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 3.2285714285714286, | |
| "grad_norm": 0.258133896429674, | |
| "learning_rate": 2.693253064446348e-05, | |
| "loss": 0.9663, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 3.238095238095238, | |
| "grad_norm": 0.22888650123481227, | |
| "learning_rate": 2.6681192981218348e-05, | |
| "loss": 0.9815, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.2476190476190476, | |
| "grad_norm": 0.2052454770699904, | |
| "learning_rate": 2.6430445355278788e-05, | |
| "loss": 0.9752, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 3.257142857142857, | |
| "grad_norm": 0.2161754437788185, | |
| "learning_rate": 2.6180298875031098e-05, | |
| "loss": 0.9688, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 3.2666666666666666, | |
| "grad_norm": 0.2389481202074638, | |
| "learning_rate": 2.59307646222302e-05, | |
| "loss": 0.9903, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 3.276190476190476, | |
| "grad_norm": 0.1688931879244891, | |
| "learning_rate": 2.5681853651508704e-05, | |
| "loss": 0.9906, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 3.2857142857142856, | |
| "grad_norm": 0.2709186008027029, | |
| "learning_rate": 2.5433576989887115e-05, | |
| "loss": 0.9634, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 3.295238095238095, | |
| "grad_norm": 0.17314495292558174, | |
| "learning_rate": 2.5185945636285416e-05, | |
| "loss": 0.9805, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 3.3047619047619046, | |
| "grad_norm": 0.226865700017327, | |
| "learning_rate": 2.4938970561035753e-05, | |
| "loss": 0.9745, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 3.314285714285714, | |
| "grad_norm": 0.22027328808623828, | |
| "learning_rate": 2.4692662705396412e-05, | |
| "loss": 0.9808, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 3.323809523809524, | |
| "grad_norm": 0.21347101273276106, | |
| "learning_rate": 2.444703298106718e-05, | |
| "loss": 0.9742, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 0.6099297681729771, | |
| "learning_rate": 2.420209226970588e-05, | |
| "loss": 1.0004, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.342857142857143, | |
| "grad_norm": 0.18262102730707186, | |
| "learning_rate": 2.395785142244634e-05, | |
| "loss": 0.9785, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 3.3523809523809525, | |
| "grad_norm": 0.19086404836286397, | |
| "learning_rate": 2.3714321259417662e-05, | |
| "loss": 0.9784, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 3.361904761904762, | |
| "grad_norm": 0.2101963610417534, | |
| "learning_rate": 2.3471512569264884e-05, | |
| "loss": 0.9703, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 3.3714285714285714, | |
| "grad_norm": 0.20089915550579932, | |
| "learning_rate": 2.3229436108671014e-05, | |
| "loss": 0.9753, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 3.380952380952381, | |
| "grad_norm": 0.17710318859904475, | |
| "learning_rate": 2.298810260188054e-05, | |
| "loss": 0.9717, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 3.3904761904761904, | |
| "grad_norm": 0.1505659891122774, | |
| "learning_rate": 2.274752274022428e-05, | |
| "loss": 0.9778, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.18038983533749448, | |
| "learning_rate": 2.250770718164579e-05, | |
| "loss": 0.9854, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 3.4095238095238094, | |
| "grad_norm": 0.18174732799541482, | |
| "learning_rate": 2.2268666550229173e-05, | |
| "loss": 0.9787, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 3.419047619047619, | |
| "grad_norm": 0.15466454580054184, | |
| "learning_rate": 2.203041143572845e-05, | |
| "loss": 0.9771, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 0.17410077565262463, | |
| "learning_rate": 2.1792952393098394e-05, | |
| "loss": 0.9876, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.4380952380952383, | |
| "grad_norm": 0.17696206014651153, | |
| "learning_rate": 2.155629994202696e-05, | |
| "loss": 0.9844, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 3.447619047619048, | |
| "grad_norm": 0.15599328687176803, | |
| "learning_rate": 2.1320464566469233e-05, | |
| "loss": 0.9827, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 3.4571428571428573, | |
| "grad_norm": 0.18924852148069596, | |
| "learning_rate": 2.1085456714183002e-05, | |
| "loss": 0.9737, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 3.466666666666667, | |
| "grad_norm": 0.21383476826326972, | |
| "learning_rate": 2.0851286796265838e-05, | |
| "loss": 0.9817, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 3.4761904761904763, | |
| "grad_norm": 0.17321070429571578, | |
| "learning_rate": 2.0617965186694e-05, | |
| "loss": 0.984, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 3.4857142857142858, | |
| "grad_norm": 0.17184638099703334, | |
| "learning_rate": 2.0385502221862742e-05, | |
| "loss": 0.9878, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 3.4952380952380953, | |
| "grad_norm": 0.17908426969585328, | |
| "learning_rate": 2.015390820012847e-05, | |
| "loss": 0.9842, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 3.5047619047619047, | |
| "grad_norm": 0.1640535311292707, | |
| "learning_rate": 1.9923193381352468e-05, | |
| "loss": 0.9641, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 3.5142857142857142, | |
| "grad_norm": 0.16868052713135487, | |
| "learning_rate": 1.9693367986446415e-05, | |
| "loss": 0.9616, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 3.5238095238095237, | |
| "grad_norm": 0.14316347468811785, | |
| "learning_rate": 1.9464442196919525e-05, | |
| "loss": 0.9854, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.533333333333333, | |
| "grad_norm": 0.13603829907464166, | |
| "learning_rate": 1.9236426154427583e-05, | |
| "loss": 0.9741, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 3.5428571428571427, | |
| "grad_norm": 0.13833016109922058, | |
| "learning_rate": 1.9009329960323594e-05, | |
| "loss": 0.9911, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 3.552380952380952, | |
| "grad_norm": 0.14395757212272578, | |
| "learning_rate": 1.8783163675210307e-05, | |
| "loss": 0.9837, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 3.5619047619047617, | |
| "grad_norm": 0.16846410866461603, | |
| "learning_rate": 1.8557937318494507e-05, | |
| "loss": 0.9744, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 3.571428571428571, | |
| "grad_norm": 0.1523793105601579, | |
| "learning_rate": 1.8333660867943163e-05, | |
| "loss": 0.971, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 3.580952380952381, | |
| "grad_norm": 0.13785660562080743, | |
| "learning_rate": 1.8110344259241398e-05, | |
| "loss": 0.9766, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 3.5904761904761906, | |
| "grad_norm": 0.1325642554414296, | |
| "learning_rate": 1.7887997385552278e-05, | |
| "loss": 0.9649, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.15775432838130118, | |
| "learning_rate": 1.766663009707861e-05, | |
| "loss": 0.9689, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 3.6095238095238096, | |
| "grad_norm": 0.12446439908125384, | |
| "learning_rate": 1.7446252200626555e-05, | |
| "loss": 0.9722, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 3.619047619047619, | |
| "grad_norm": 0.13869302526796073, | |
| "learning_rate": 1.7226873459171142e-05, | |
| "loss": 0.9651, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.6285714285714286, | |
| "grad_norm": 0.12813837269442743, | |
| "learning_rate": 1.700850359142373e-05, | |
| "loss": 0.9659, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 3.638095238095238, | |
| "grad_norm": 0.13933232700434076, | |
| "learning_rate": 1.679115227140155e-05, | |
| "loss": 0.9731, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 3.6476190476190475, | |
| "grad_norm": 0.14485112162942804, | |
| "learning_rate": 1.6574829127999067e-05, | |
| "loss": 0.9854, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 3.657142857142857, | |
| "grad_norm": 0.1390597866481306, | |
| "learning_rate": 1.6359543744561438e-05, | |
| "loss": 0.9706, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 0.14156616905744906, | |
| "learning_rate": 1.614530565845994e-05, | |
| "loss": 0.9741, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 3.6761904761904765, | |
| "grad_norm": 0.14789503098270715, | |
| "learning_rate": 1.5932124360669473e-05, | |
| "loss": 0.9792, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 3.685714285714286, | |
| "grad_norm": 0.14078688051407673, | |
| "learning_rate": 1.5720009295348103e-05, | |
| "loss": 0.9805, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 3.6952380952380954, | |
| "grad_norm": 0.149662490393232, | |
| "learning_rate": 1.5508969859418617e-05, | |
| "loss": 0.9708, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 3.704761904761905, | |
| "grad_norm": 0.15565972406127485, | |
| "learning_rate": 1.529901540215233e-05, | |
| "loss": 0.9907, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 3.7142857142857144, | |
| "grad_norm": 0.14457925539378302, | |
| "learning_rate": 1.5090155224754823e-05, | |
| "loss": 0.972, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.723809523809524, | |
| "grad_norm": 0.15980853988420965, | |
| "learning_rate": 1.4882398579953928e-05, | |
| "loss": 0.9858, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 3.7333333333333334, | |
| "grad_norm": 0.1353513800841828, | |
| "learning_rate": 1.4675754671589801e-05, | |
| "loss": 0.9843, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 3.742857142857143, | |
| "grad_norm": 0.13602936334115348, | |
| "learning_rate": 1.4470232654207208e-05, | |
| "loss": 0.9819, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 3.7523809523809524, | |
| "grad_norm": 0.14448055061579274, | |
| "learning_rate": 1.4265841632649915e-05, | |
| "loss": 0.9722, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 3.761904761904762, | |
| "grad_norm": 0.13590836391844754, | |
| "learning_rate": 1.40625906616574e-05, | |
| "loss": 0.9658, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 3.7714285714285714, | |
| "grad_norm": 0.1435455827754151, | |
| "learning_rate": 1.3860488745463694e-05, | |
| "loss": 0.9794, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 3.780952380952381, | |
| "grad_norm": 0.1397532798524967, | |
| "learning_rate": 1.365954483739846e-05, | |
| "loss": 0.9829, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 3.7904761904761903, | |
| "grad_norm": 0.12978394192415188, | |
| "learning_rate": 1.3459767839490386e-05, | |
| "loss": 0.9721, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.14356015101687936, | |
| "learning_rate": 1.326116660207279e-05, | |
| "loss": 0.9835, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "grad_norm": 0.13247764252770453, | |
| "learning_rate": 1.3063749923391557e-05, | |
| "loss": 0.9765, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.819047619047619, | |
| "grad_norm": 0.13968884823844, | |
| "learning_rate": 1.2867526549215356e-05, | |
| "loss": 0.9806, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 3.8285714285714287, | |
| "grad_norm": 0.15020881590521024, | |
| "learning_rate": 1.2672505172448201e-05, | |
| "loss": 0.9743, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 3.8380952380952382, | |
| "grad_norm": 0.12600562838248064, | |
| "learning_rate": 1.2478694432744342e-05, | |
| "loss": 0.9747, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 3.8476190476190477, | |
| "grad_norm": 0.1253894515680319, | |
| "learning_rate": 1.2286102916125534e-05, | |
| "loss": 0.9734, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 3.857142857142857, | |
| "grad_norm": 0.14320490323799898, | |
| "learning_rate": 1.2094739154600616e-05, | |
| "loss": 0.9816, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 3.8666666666666667, | |
| "grad_norm": 0.12399745336033988, | |
| "learning_rate": 1.1904611625787612e-05, | |
| "loss": 0.9884, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 3.876190476190476, | |
| "grad_norm": 0.13417523628822833, | |
| "learning_rate": 1.1715728752538103e-05, | |
| "loss": 0.9783, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 3.8857142857142857, | |
| "grad_norm": 0.15683701346116036, | |
| "learning_rate": 1.1528098902564109e-05, | |
| "loss": 0.9733, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 3.895238095238095, | |
| "grad_norm": 0.13713534522259704, | |
| "learning_rate": 1.1341730388067393e-05, | |
| "loss": 0.9818, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 3.9047619047619047, | |
| "grad_norm": 0.13758248343689114, | |
| "learning_rate": 1.1156631465371213e-05, | |
| "loss": 0.9688, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.914285714285714, | |
| "grad_norm": 0.14115308612974595, | |
| "learning_rate": 1.0972810334554565e-05, | |
| "loss": 0.9797, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 3.923809523809524, | |
| "grad_norm": 0.5870477835443715, | |
| "learning_rate": 1.0790275139088879e-05, | |
| "loss": 0.982, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 3.9333333333333336, | |
| "grad_norm": 0.12922641407686744, | |
| "learning_rate": 1.0609033965477318e-05, | |
| "loss": 0.9805, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 3.942857142857143, | |
| "grad_norm": 0.14422492668411402, | |
| "learning_rate": 1.0429094842896484e-05, | |
| "loss": 0.9795, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 3.9523809523809526, | |
| "grad_norm": 0.14945738902129274, | |
| "learning_rate": 1.0250465742840743e-05, | |
| "loss": 0.9736, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 3.961904761904762, | |
| "grad_norm": 0.12244500468761615, | |
| "learning_rate": 1.007315457876907e-05, | |
| "loss": 0.9811, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 3.9714285714285715, | |
| "grad_norm": 0.13445713568795264, | |
| "learning_rate": 9.897169205754461e-06, | |
| "loss": 0.9846, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 3.980952380952381, | |
| "grad_norm": 0.13743382078481892, | |
| "learning_rate": 9.722517420135977e-06, | |
| "loss": 0.9688, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 3.9904761904761905, | |
| "grad_norm": 0.1233003022413971, | |
| "learning_rate": 9.549206959173331e-06, | |
| "loss": 0.9845, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.1263829549351974, | |
| "learning_rate": 9.377245500704135e-06, | |
| "loss": 0.9928, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.0095238095238095, | |
| "grad_norm": 0.17390084497636504, | |
| "learning_rate": 9.206640662803746e-06, | |
| "loss": 0.9623, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 4.019047619047619, | |
| "grad_norm": 0.1431373191141913, | |
| "learning_rate": 9.037400003447808e-06, | |
| "loss": 0.9616, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 4.0285714285714285, | |
| "grad_norm": 0.13455551888638445, | |
| "learning_rate": 8.869531020177367e-06, | |
| "loss": 0.9475, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 4.038095238095238, | |
| "grad_norm": 0.14362146854953411, | |
| "learning_rate": 8.703041149766797e-06, | |
| "loss": 0.9541, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.0476190476190474, | |
| "grad_norm": 0.1452952254226326, | |
| "learning_rate": 8.537937767894303e-06, | |
| "loss": 0.9548, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 4.057142857142857, | |
| "grad_norm": 0.156138958795091, | |
| "learning_rate": 8.374228188815157e-06, | |
| "loss": 0.9597, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 4.066666666666666, | |
| "grad_norm": 0.15807376487720612, | |
| "learning_rate": 8.211919665037697e-06, | |
| "loss": 0.944, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 4.076190476190476, | |
| "grad_norm": 0.1428349305587997, | |
| "learning_rate": 8.051019387002035e-06, | |
| "loss": 0.957, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 4.085714285714285, | |
| "grad_norm": 0.156467940239568, | |
| "learning_rate": 7.891534482761463e-06, | |
| "loss": 0.9519, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 4.095238095238095, | |
| "grad_norm": 0.15091456328015054, | |
| "learning_rate": 7.733472017666739e-06, | |
| "loss": 0.9579, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.104761904761904, | |
| "grad_norm": 0.15455184255548396, | |
| "learning_rate": 7.57683899405305e-06, | |
| "loss": 0.9614, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 4.114285714285714, | |
| "grad_norm": 0.23250611326493095, | |
| "learning_rate": 7.42164235092981e-06, | |
| "loss": 0.9777, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 4.123809523809523, | |
| "grad_norm": 0.15146330742839126, | |
| "learning_rate": 7.26788896367324e-06, | |
| "loss": 0.9519, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 4.133333333333334, | |
| "grad_norm": 0.14062980079515083, | |
| "learning_rate": 7.115585643721798e-06, | |
| "loss": 0.961, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 4.142857142857143, | |
| "grad_norm": 0.13654994504554915, | |
| "learning_rate": 6.964739138274433e-06, | |
| "loss": 0.9638, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 4.152380952380953, | |
| "grad_norm": 0.13405718763219662, | |
| "learning_rate": 6.815356129991624e-06, | |
| "loss": 0.9523, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 4.161904761904762, | |
| "grad_norm": 0.1355347721482518, | |
| "learning_rate": 6.667443236699398e-06, | |
| "loss": 0.9572, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 4.171428571428572, | |
| "grad_norm": 0.3193405192419738, | |
| "learning_rate": 6.521007011096143e-06, | |
| "loss": 0.965, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 4.180952380952381, | |
| "grad_norm": 0.12077417580448366, | |
| "learning_rate": 6.376053940462279e-06, | |
| "loss": 0.959, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 4.190476190476191, | |
| "grad_norm": 0.11975543200056642, | |
| "learning_rate": 6.232590446372864e-06, | |
| "loss": 0.9463, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.12394898025158416, | |
| "learning_rate": 6.090622884413164e-06, | |
| "loss": 0.9599, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 4.20952380952381, | |
| "grad_norm": 0.13246928966220897, | |
| "learning_rate": 5.95015754389705e-06, | |
| "loss": 0.9587, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 4.219047619047619, | |
| "grad_norm": 0.17218887548328696, | |
| "learning_rate": 5.811200647588386e-06, | |
| "loss": 0.9483, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 4.228571428571429, | |
| "grad_norm": 0.12269437836537307, | |
| "learning_rate": 5.673758351425358e-06, | |
| "loss": 0.9652, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 4.238095238095238, | |
| "grad_norm": 0.1150641697364405, | |
| "learning_rate": 5.537836744247753e-06, | |
| "loss": 0.9481, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 4.247619047619048, | |
| "grad_norm": 0.12282362749817176, | |
| "learning_rate": 5.403441847527227e-06, | |
| "loss": 0.9565, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 4.257142857142857, | |
| "grad_norm": 0.12605101659916942, | |
| "learning_rate": 5.270579615100518e-06, | |
| "loss": 0.9476, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 4.266666666666667, | |
| "grad_norm": 0.12368987938067505, | |
| "learning_rate": 5.139255932905731e-06, | |
| "loss": 0.9538, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 4.276190476190476, | |
| "grad_norm": 0.11271067268917899, | |
| "learning_rate": 5.009476618721549e-06, | |
| "loss": 0.9776, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "grad_norm": 0.1136633818810862, | |
| "learning_rate": 4.881247421909523e-06, | |
| "loss": 0.962, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.295238095238095, | |
| "grad_norm": 0.127022521830231, | |
| "learning_rate": 4.754574023159335e-06, | |
| "loss": 0.9574, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 4.304761904761905, | |
| "grad_norm": 0.11992208253243061, | |
| "learning_rate": 4.629462034237193e-06, | |
| "loss": 0.9606, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 4.314285714285714, | |
| "grad_norm": 0.11191987434816011, | |
| "learning_rate": 4.505916997737143e-06, | |
| "loss": 0.9489, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 4.3238095238095235, | |
| "grad_norm": 0.1183918815452489, | |
| "learning_rate": 4.383944386835617e-06, | |
| "loss": 0.9516, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 4.333333333333333, | |
| "grad_norm": 0.11408502654065866, | |
| "learning_rate": 4.263549605048898e-06, | |
| "loss": 0.9407, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 4.3428571428571425, | |
| "grad_norm": 0.11790658741334677, | |
| "learning_rate": 4.144737985993774e-06, | |
| "loss": 0.9539, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 4.352380952380952, | |
| "grad_norm": 0.12878580244190324, | |
| "learning_rate": 4.027514793151235e-06, | |
| "loss": 0.961, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 4.3619047619047615, | |
| "grad_norm": 0.12558845969705756, | |
| "learning_rate": 3.9118852196333e-06, | |
| "loss": 0.9681, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 4.371428571428572, | |
| "grad_norm": 0.11339758568248902, | |
| "learning_rate": 3.7978543879529704e-06, | |
| "loss": 0.9544, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 4.380952380952381, | |
| "grad_norm": 0.12502688051375377, | |
| "learning_rate": 3.6854273497972705e-06, | |
| "loss": 0.9573, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.390476190476191, | |
| "grad_norm": 0.10534529012828957, | |
| "learning_rate": 3.574609085803471e-06, | |
| "loss": 0.9369, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.10245960822897354, | |
| "learning_rate": 3.4654045053384456e-06, | |
| "loss": 0.9501, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 4.40952380952381, | |
| "grad_norm": 0.11767956608151667, | |
| "learning_rate": 3.3578184462811714e-06, | |
| "loss": 0.9564, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 4.419047619047619, | |
| "grad_norm": 0.12275952669713826, | |
| "learning_rate": 3.2518556748083817e-06, | |
| "loss": 0.9536, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 4.428571428571429, | |
| "grad_norm": 0.10820465034590014, | |
| "learning_rate": 3.1475208851834815e-06, | |
| "loss": 0.9736, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 4.438095238095238, | |
| "grad_norm": 0.10999235496173602, | |
| "learning_rate": 3.0448186995485307e-06, | |
| "loss": 0.9661, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 4.447619047619048, | |
| "grad_norm": 0.10346043147687037, | |
| "learning_rate": 2.9437536677194976e-06, | |
| "loss": 0.9567, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 4.457142857142857, | |
| "grad_norm": 0.10719225041373819, | |
| "learning_rate": 2.844330266984705e-06, | |
| "loss": 0.9456, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 4.466666666666667, | |
| "grad_norm": 0.10494961443465013, | |
| "learning_rate": 2.746552901906463e-06, | |
| "loss": 0.9639, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 4.476190476190476, | |
| "grad_norm": 0.10157859382678862, | |
| "learning_rate": 2.650425904125964e-06, | |
| "loss": 0.9562, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.485714285714286, | |
| "grad_norm": 0.09988194579990615, | |
| "learning_rate": 2.55595353217136e-06, | |
| "loss": 0.9488, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 4.495238095238095, | |
| "grad_norm": 0.09993285343775345, | |
| "learning_rate": 2.463139971269133e-06, | |
| "loss": 0.9599, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 4.504761904761905, | |
| "grad_norm": 0.09844176642750738, | |
| "learning_rate": 2.371989333158671e-06, | |
| "loss": 0.9615, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 4.514285714285714, | |
| "grad_norm": 0.10190707779708189, | |
| "learning_rate": 2.2825056559101145e-06, | |
| "loss": 0.9563, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 4.523809523809524, | |
| "grad_norm": 0.10131001370817404, | |
| "learning_rate": 2.194692903745459e-06, | |
| "loss": 0.9587, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 4.533333333333333, | |
| "grad_norm": 0.10251872435272494, | |
| "learning_rate": 2.1085549668629567e-06, | |
| "loss": 0.9489, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 4.542857142857143, | |
| "grad_norm": 0.10043227346155645, | |
| "learning_rate": 2.0240956612647487e-06, | |
| "loss": 0.9634, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 4.552380952380952, | |
| "grad_norm": 0.09956161225983029, | |
| "learning_rate": 1.9413187285878355e-06, | |
| "loss": 0.9468, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 4.561904761904762, | |
| "grad_norm": 0.11045616365540861, | |
| "learning_rate": 1.8602278359383063e-06, | |
| "loss": 0.9526, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 4.571428571428571, | |
| "grad_norm": 0.09834202928593194, | |
| "learning_rate": 1.78082657572888e-06, | |
| "loss": 0.9609, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.580952380952381, | |
| "grad_norm": 0.1147748598722428, | |
| "learning_rate": 1.7031184655197818e-06, | |
| "loss": 0.9412, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 4.59047619047619, | |
| "grad_norm": 0.10211354358709429, | |
| "learning_rate": 1.6271069478628644e-06, | |
| "loss": 0.9557, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.1146778979125764, | |
| "learning_rate": 1.5527953901491466e-06, | |
| "loss": 0.9468, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 4.609523809523809, | |
| "grad_norm": 0.10809368043798627, | |
| "learning_rate": 1.48018708445961e-06, | |
| "loss": 0.9693, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 4.619047619047619, | |
| "grad_norm": 0.09749254775799188, | |
| "learning_rate": 1.409285247419363e-06, | |
| "loss": 0.9524, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 4.628571428571428, | |
| "grad_norm": 0.09704895874324644, | |
| "learning_rate": 1.3400930200551331e-06, | |
| "loss": 0.9607, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 4.638095238095238, | |
| "grad_norm": 0.09581374811618217, | |
| "learning_rate": 1.2726134676561385e-06, | |
| "loss": 0.9747, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 4.647619047619048, | |
| "grad_norm": 0.09600971666081125, | |
| "learning_rate": 1.2068495796382495e-06, | |
| "loss": 0.9483, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 4.6571428571428575, | |
| "grad_norm": 0.10065951842357326, | |
| "learning_rate": 1.142804269411606e-06, | |
| "loss": 0.9652, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 0.1012630059377017, | |
| "learning_rate": 1.0804803742515068e-06, | |
| "loss": 0.9417, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.6761904761904765, | |
| "grad_norm": 0.10102648935759229, | |
| "learning_rate": 1.0198806551727557e-06, | |
| "loss": 0.9575, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 4.685714285714286, | |
| "grad_norm": 0.10109454230992841, | |
| "learning_rate": 9.610077968072962e-07, | |
| "loss": 0.966, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 4.695238095238095, | |
| "grad_norm": 0.09805229033614825, | |
| "learning_rate": 9.038644072853331e-07, | |
| "loss": 0.9671, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 4.704761904761905, | |
| "grad_norm": 0.09726857715744779, | |
| "learning_rate": 8.484530181197504e-07, | |
| "loss": 0.9477, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 4.714285714285714, | |
| "grad_norm": 0.1012887727099665, | |
| "learning_rate": 7.947760840939688e-07, | |
| "loss": 0.9572, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 4.723809523809524, | |
| "grad_norm": 0.09689224625015615, | |
| "learning_rate": 7.428359831532117e-07, | |
| "loss": 0.9385, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 4.733333333333333, | |
| "grad_norm": 0.09894505285476389, | |
| "learning_rate": 6.926350162991613e-07, | |
| "loss": 0.9502, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 4.742857142857143, | |
| "grad_norm": 0.10241361126956, | |
| "learning_rate": 6.441754074879925e-07, | |
| "loss": 0.9495, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 4.752380952380952, | |
| "grad_norm": 0.09443159904089143, | |
| "learning_rate": 5.974593035318777e-07, | |
| "loss": 0.9536, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 0.10189499647323387, | |
| "learning_rate": 5.524887740038676e-07, | |
| "loss": 0.9472, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.771428571428571, | |
| "grad_norm": 0.10773088780281193, | |
| "learning_rate": 5.092658111462179e-07, | |
| "loss": 0.9473, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 4.780952380952381, | |
| "grad_norm": 0.3240978802797332, | |
| "learning_rate": 4.6779232978211297e-07, | |
| "loss": 0.9784, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 4.79047619047619, | |
| "grad_norm": 0.09242163553460452, | |
| "learning_rate": 4.280701672308585e-07, | |
| "loss": 0.9583, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.09118495736223464, | |
| "learning_rate": 3.901010832264662e-07, | |
| "loss": 0.9473, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 4.809523809523809, | |
| "grad_norm": 0.09466091553337481, | |
| "learning_rate": 3.5388675983971664e-07, | |
| "loss": 0.9523, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 4.819047619047619, | |
| "grad_norm": 0.10089901559145306, | |
| "learning_rate": 3.1942880140360955e-07, | |
| "loss": 0.9591, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 4.828571428571428, | |
| "grad_norm": 0.10487799317926302, | |
| "learning_rate": 2.867287344423364e-07, | |
| "loss": 0.9764, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 4.838095238095238, | |
| "grad_norm": 0.0945247266110506, | |
| "learning_rate": 2.557880076036101e-07, | |
| "loss": 0.9492, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 4.847619047619047, | |
| "grad_norm": 0.09269936781651757, | |
| "learning_rate": 2.2660799159451629e-07, | |
| "loss": 0.9579, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 4.857142857142857, | |
| "grad_norm": 0.09460038465065274, | |
| "learning_rate": 1.991899791207663e-07, | |
| "loss": 0.9638, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.866666666666667, | |
| "grad_norm": 0.0982949463276317, | |
| "learning_rate": 1.7353518482946308e-07, | |
| "loss": 0.9535, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 4.876190476190477, | |
| "grad_norm": 0.09485743697787292, | |
| "learning_rate": 1.4964474525525075e-07, | |
| "loss": 0.9599, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 4.885714285714286, | |
| "grad_norm": 0.0963473582791281, | |
| "learning_rate": 1.2751971876999504e-07, | |
| "loss": 0.962, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 4.895238095238096, | |
| "grad_norm": 0.09723177864800757, | |
| "learning_rate": 1.0716108553588289e-07, | |
| "loss": 0.9398, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 4.904761904761905, | |
| "grad_norm": 0.09412117317242105, | |
| "learning_rate": 8.856974746199954e-08, | |
| "loss": 0.9578, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 4.914285714285715, | |
| "grad_norm": 0.08864592036283941, | |
| "learning_rate": 7.174652816437811e-08, | |
| "loss": 0.9599, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 4.923809523809524, | |
| "grad_norm": 0.09083413925650892, | |
| "learning_rate": 5.669217292952223e-08, | |
| "loss": 0.9512, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 4.933333333333334, | |
| "grad_norm": 0.09669535232036294, | |
| "learning_rate": 4.3407348681361314e-08, | |
| "loss": 0.9524, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 4.942857142857143, | |
| "grad_norm": 0.09367376797157739, | |
| "learning_rate": 3.189264395172753e-08, | |
| "loss": 0.9438, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 4.9523809523809526, | |
| "grad_norm": 0.09753187429670476, | |
| "learning_rate": 2.214856885427885e-08, | |
| "loss": 0.9513, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.961904761904762, | |
| "grad_norm": 0.09256693337711726, | |
| "learning_rate": 1.4175555061894942e-08, | |
| "loss": 0.9504, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 4.9714285714285715, | |
| "grad_norm": 0.09471054416033763, | |
| "learning_rate": 7.973955787567988e-09, | |
| "loss": 0.9648, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 4.980952380952381, | |
| "grad_norm": 0.09324529832027807, | |
| "learning_rate": 3.544045768730797e-09, | |
| "loss": 0.9516, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 4.9904761904761905, | |
| "grad_norm": 0.09644781364770945, | |
| "learning_rate": 8.860212551020653e-10, | |
| "loss": 0.9408, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.09523670709888162, | |
| "learning_rate": 0.0, | |
| "loss": 0.9441, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 525, | |
| "total_flos": 1.350981278339013e+19, | |
| "train_loss": 1.0422365560985747, | |
| "train_runtime": 52983.8768, | |
| "train_samples_per_second": 5.072, | |
| "train_steps_per_second": 0.01 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 525, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.350981278339013e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |