| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9715340522685321, |
| "eval_steps": 500, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0032, |
| "grad_norm": 13.684800148010254, |
| "learning_rate": 0.0, |
| "loss": 2.3276, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 13.660787582397461, |
| "learning_rate": 4e-05, |
| "loss": 2.2792, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 13.35280704498291, |
| "learning_rate": 8e-05, |
| "loss": 2.4151, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 6.15027379989624, |
| "learning_rate": 0.00012, |
| "loss": 1.7812, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 1.3168226480484009, |
| "learning_rate": 0.00016, |
| "loss": 1.4536, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 0.9872580170631409, |
| "learning_rate": 0.0002, |
| "loss": 1.4171, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 0.7496100664138794, |
| "learning_rate": 0.00019935064935064936, |
| "loss": 1.4168, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 0.7376005053520203, |
| "learning_rate": 0.00019870129870129872, |
| "loss": 1.3659, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 0.5281137824058533, |
| "learning_rate": 0.00019805194805194807, |
| "loss": 1.2566, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.5485746264457703, |
| "learning_rate": 0.00019740259740259742, |
| "loss": 1.3761, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 0.5506592392921448, |
| "learning_rate": 0.00019675324675324675, |
| "loss": 1.3327, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 0.49382686614990234, |
| "learning_rate": 0.00019610389610389613, |
| "loss": 1.3727, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 0.36203011870384216, |
| "learning_rate": 0.00019545454545454548, |
| "loss": 1.1515, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 0.3528599739074707, |
| "learning_rate": 0.0001948051948051948, |
| "loss": 1.2636, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.31244418025016785, |
| "learning_rate": 0.00019415584415584416, |
| "loss": 1.1873, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 0.3379523754119873, |
| "learning_rate": 0.00019350649350649354, |
| "loss": 1.2657, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 0.3025083839893341, |
| "learning_rate": 0.00019285714285714286, |
| "loss": 1.2846, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 0.2560190260410309, |
| "learning_rate": 0.00019220779220779222, |
| "loss": 1.1587, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 0.2554129958152771, |
| "learning_rate": 0.00019155844155844157, |
| "loss": 1.2812, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.22662702202796936, |
| "learning_rate": 0.00019090909090909092, |
| "loss": 1.1664, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 0.2515714168548584, |
| "learning_rate": 0.00019025974025974027, |
| "loss": 1.2177, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 0.24396637082099915, |
| "learning_rate": 0.00018961038961038963, |
| "loss": 1.2053, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 0.24488303065299988, |
| "learning_rate": 0.00018896103896103895, |
| "loss": 1.2074, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 0.2168620079755783, |
| "learning_rate": 0.00018831168831168833, |
| "loss": 1.1284, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.24021224677562714, |
| "learning_rate": 0.00018766233766233769, |
| "loss": 1.2169, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 0.20057056844234467, |
| "learning_rate": 0.000187012987012987, |
| "loss": 1.1031, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 0.19900795817375183, |
| "learning_rate": 0.00018636363636363636, |
| "loss": 1.1004, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 0.2019268423318863, |
| "learning_rate": 0.00018571428571428572, |
| "loss": 1.1476, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 0.1996479034423828, |
| "learning_rate": 0.00018506493506493507, |
| "loss": 1.1455, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.25262022018432617, |
| "learning_rate": 0.00018441558441558442, |
| "loss": 1.1025, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 0.225438192486763, |
| "learning_rate": 0.00018376623376623378, |
| "loss": 1.1954, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.17834505438804626, |
| "learning_rate": 0.00018311688311688313, |
| "loss": 1.0934, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 0.20071206986904144, |
| "learning_rate": 0.00018246753246753248, |
| "loss": 1.0488, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 0.1920139640569687, |
| "learning_rate": 0.00018181818181818183, |
| "loss": 1.123, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.18714852631092072, |
| "learning_rate": 0.0001811688311688312, |
| "loss": 1.0798, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 0.18315713107585907, |
| "learning_rate": 0.00018051948051948054, |
| "loss": 1.1107, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.19156870245933533, |
| "learning_rate": 0.00017987012987012987, |
| "loss": 1.1125, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 0.21527768671512604, |
| "learning_rate": 0.00017922077922077922, |
| "loss": 1.1346, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 0.1871163249015808, |
| "learning_rate": 0.0001785714285714286, |
| "loss": 1.0742, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.17750784754753113, |
| "learning_rate": 0.00017792207792207792, |
| "loss": 1.1323, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 0.177419051527977, |
| "learning_rate": 0.00017727272727272728, |
| "loss": 1.1405, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.16714292764663696, |
| "learning_rate": 0.00017662337662337663, |
| "loss": 1.1084, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 0.1610356718301773, |
| "learning_rate": 0.00017597402597402598, |
| "loss": 1.1125, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 0.2548656761646271, |
| "learning_rate": 0.00017532467532467534, |
| "loss": 1.1114, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.1731044203042984, |
| "learning_rate": 0.0001746753246753247, |
| "loss": 1.1197, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 0.1739533394575119, |
| "learning_rate": 0.00017402597402597401, |
| "loss": 1.1777, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 0.2178352177143097, |
| "learning_rate": 0.0001733766233766234, |
| "loss": 1.1111, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.17247150838375092, |
| "learning_rate": 0.00017272727272727275, |
| "loss": 1.1253, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 0.18075324594974518, |
| "learning_rate": 0.00017207792207792207, |
| "loss": 1.1358, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.15898071229457855, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 1.0606, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 0.16518613696098328, |
| "learning_rate": 0.0001707792207792208, |
| "loss": 1.0944, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.16035063564777374, |
| "learning_rate": 0.00017012987012987013, |
| "loss": 1.0554, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 0.1686483472585678, |
| "learning_rate": 0.00016948051948051948, |
| "loss": 1.0384, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 0.16575631499290466, |
| "learning_rate": 0.00016883116883116884, |
| "loss": 1.0243, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.16840039193630219, |
| "learning_rate": 0.0001681818181818182, |
| "loss": 1.117, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 0.17616064846515656, |
| "learning_rate": 0.00016753246753246754, |
| "loss": 1.0743, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.168218195438385, |
| "learning_rate": 0.0001668831168831169, |
| "loss": 1.0627, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.17026656866073608, |
| "learning_rate": 0.00016623376623376625, |
| "loss": 1.0059, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.16454458236694336, |
| "learning_rate": 0.0001655844155844156, |
| "loss": 0.9943, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.17185136675834656, |
| "learning_rate": 0.00016493506493506495, |
| "loss": 1.1545, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 0.17822986841201782, |
| "learning_rate": 0.00016428571428571428, |
| "loss": 1.073, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.1676608771085739, |
| "learning_rate": 0.00016363636363636366, |
| "loss": 1.0886, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 0.1727771908044815, |
| "learning_rate": 0.000162987012987013, |
| "loss": 1.0432, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.17827573418617249, |
| "learning_rate": 0.00016233766233766234, |
| "loss": 1.083, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.19807517528533936, |
| "learning_rate": 0.0001616883116883117, |
| "loss": 1.1208, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 0.17693684995174408, |
| "learning_rate": 0.00016103896103896104, |
| "loss": 1.089, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.15489234030246735, |
| "learning_rate": 0.0001603896103896104, |
| "loss": 0.9707, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 0.16443990170955658, |
| "learning_rate": 0.00015974025974025975, |
| "loss": 1.0643, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 0.2051103413105011, |
| "learning_rate": 0.0001590909090909091, |
| "loss": 1.1246, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.18824075162410736, |
| "learning_rate": 0.00015844155844155845, |
| "loss": 1.0855, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 0.18659448623657227, |
| "learning_rate": 0.0001577922077922078, |
| "loss": 1.1412, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 0.1854114979505539, |
| "learning_rate": 0.00015714285714285716, |
| "loss": 1.0249, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.1876193732023239, |
| "learning_rate": 0.00015649350649350649, |
| "loss": 1.1029, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 0.1888684630393982, |
| "learning_rate": 0.00015584415584415587, |
| "loss": 1.0789, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.20240606367588043, |
| "learning_rate": 0.0001551948051948052, |
| "loss": 1.0495, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 0.232120081782341, |
| "learning_rate": 0.00015454545454545454, |
| "loss": 1.0735, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.16897843778133392, |
| "learning_rate": 0.0001538961038961039, |
| "loss": 1.0164, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 0.18796634674072266, |
| "learning_rate": 0.00015324675324675325, |
| "loss": 1.0676, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 0.19574032723903656, |
| "learning_rate": 0.0001525974025974026, |
| "loss": 1.0456, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.18007811903953552, |
| "learning_rate": 0.00015194805194805196, |
| "loss": 1.0894, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 0.18932929635047913, |
| "learning_rate": 0.0001512987012987013, |
| "loss": 1.0729, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 0.20614288747310638, |
| "learning_rate": 0.00015064935064935066, |
| "loss": 1.0854, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 0.19291089475154877, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 1.1217, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 0.18916529417037964, |
| "learning_rate": 0.00014935064935064934, |
| "loss": 1.0963, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.20306220650672913, |
| "learning_rate": 0.00014870129870129872, |
| "loss": 1.0898, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.17870067059993744, |
| "learning_rate": 0.00014805194805194807, |
| "loss": 1.0213, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.18411923944950104, |
| "learning_rate": 0.0001474025974025974, |
| "loss": 1.0844, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 0.18788227438926697, |
| "learning_rate": 0.00014675324675324675, |
| "loss": 1.0338, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 0.23874884843826294, |
| "learning_rate": 0.00014610389610389613, |
| "loss": 1.1118, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.19380499422550201, |
| "learning_rate": 0.00014545454545454546, |
| "loss": 1.0464, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 0.18968750536441803, |
| "learning_rate": 0.0001448051948051948, |
| "loss": 1.0569, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 0.19545753300189972, |
| "learning_rate": 0.00014415584415584416, |
| "loss": 1.1225, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 0.19170494377613068, |
| "learning_rate": 0.00014350649350649352, |
| "loss": 1.0602, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 0.17953918874263763, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 1.032, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.1822536289691925, |
| "learning_rate": 0.00014220779220779222, |
| "loss": 1.0559, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.18591298162937164, |
| "learning_rate": 0.00014155844155844155, |
| "loss": 1.031, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 0.2129002958536148, |
| "learning_rate": 0.00014090909090909093, |
| "loss": 1.1391, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.18386681377887726, |
| "learning_rate": 0.00014025974025974028, |
| "loss": 0.9919, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 0.18314239382743835, |
| "learning_rate": 0.0001396103896103896, |
| "loss": 1.0445, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.1999066174030304, |
| "learning_rate": 0.00013896103896103896, |
| "loss": 1.0538, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.18741188943386078, |
| "learning_rate": 0.00013831168831168834, |
| "loss": 1.0722, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 0.19351010024547577, |
| "learning_rate": 0.00013766233766233766, |
| "loss": 1.0491, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 0.18859203159809113, |
| "learning_rate": 0.00013701298701298702, |
| "loss": 1.0593, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 0.1962767392396927, |
| "learning_rate": 0.00013636363636363637, |
| "loss": 1.1344, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.20819440484046936, |
| "learning_rate": 0.00013571428571428572, |
| "loss": 1.1137, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 0.19590184092521667, |
| "learning_rate": 0.00013506493506493507, |
| "loss": 1.0624, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 0.18631424009799957, |
| "learning_rate": 0.00013441558441558443, |
| "loss": 1.0587, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 0.19572143256664276, |
| "learning_rate": 0.00013376623376623375, |
| "loss": 1.0494, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 0.1910988837480545, |
| "learning_rate": 0.00013311688311688313, |
| "loss": 1.0481, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.19455869495868683, |
| "learning_rate": 0.00013246753246753249, |
| "loss": 1.029, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 0.18669827282428741, |
| "learning_rate": 0.0001318181818181818, |
| "loss": 1.0513, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 0.17523664236068726, |
| "learning_rate": 0.0001311688311688312, |
| "loss": 1.0126, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 0.17929129302501678, |
| "learning_rate": 0.00013051948051948052, |
| "loss": 1.0717, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 0.19380168616771698, |
| "learning_rate": 0.00012987012987012987, |
| "loss": 1.0324, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.18090228736400604, |
| "learning_rate": 0.00012922077922077922, |
| "loss": 1.0515, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.2067340910434723, |
| "learning_rate": 0.00012857142857142858, |
| "loss": 1.0939, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 0.1880485862493515, |
| "learning_rate": 0.00012792207792207793, |
| "loss": 1.0986, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 0.182168647646904, |
| "learning_rate": 0.00012727272727272728, |
| "loss": 1.0109, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.20187129080295563, |
| "learning_rate": 0.00012662337662337663, |
| "loss": 1.0668, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.2082669734954834, |
| "learning_rate": 0.000125974025974026, |
| "loss": 1.054, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 0.18294434249401093, |
| "learning_rate": 0.00012532467532467534, |
| "loss": 1.0397, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 0.20515067875385284, |
| "learning_rate": 0.00012467532467532467, |
| "loss": 1.1092, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.1758790761232376, |
| "learning_rate": 0.00012402597402597402, |
| "loss": 0.9755, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 0.2170792669057846, |
| "learning_rate": 0.0001233766233766234, |
| "loss": 1.0434, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.202157124876976, |
| "learning_rate": 0.00012272727272727272, |
| "loss": 1.1129, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 0.18556398153305054, |
| "learning_rate": 0.00012207792207792208, |
| "loss": 1.0665, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 0.20196087658405304, |
| "learning_rate": 0.00012142857142857143, |
| "loss": 1.1, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.1921566128730774, |
| "learning_rate": 0.0001207792207792208, |
| "loss": 1.0918, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 0.18866224586963654, |
| "learning_rate": 0.00012012987012987014, |
| "loss": 1.0014, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.207601398229599, |
| "learning_rate": 0.00011948051948051949, |
| "loss": 1.0726, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 0.21592366695404053, |
| "learning_rate": 0.00011883116883116883, |
| "loss": 1.1379, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.2016124576330185, |
| "learning_rate": 0.0001181818181818182, |
| "loss": 1.1428, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.20478437840938568, |
| "learning_rate": 0.00011753246753246753, |
| "loss": 1.121, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.22730594873428345, |
| "learning_rate": 0.00011688311688311689, |
| "loss": 1.0319, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.22592711448669434, |
| "learning_rate": 0.00011623376623376625, |
| "loss": 1.1264, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.20035041868686676, |
| "learning_rate": 0.00011558441558441559, |
| "loss": 1.0686, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.20648567378520966, |
| "learning_rate": 0.00011493506493506494, |
| "loss": 1.0817, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 0.21222743391990662, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 1.0678, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 0.2075391560792923, |
| "learning_rate": 0.00011363636363636365, |
| "loss": 1.0897, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.1964101791381836, |
| "learning_rate": 0.000112987012987013, |
| "loss": 1.0906, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.22406511008739471, |
| "learning_rate": 0.00011233766233766234, |
| "loss": 1.0594, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 0.23787978291511536, |
| "learning_rate": 0.00011168831168831168, |
| "loss": 1.1053, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.21196185052394867, |
| "learning_rate": 0.00011103896103896105, |
| "loss": 1.0923, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.21042804419994354, |
| "learning_rate": 0.0001103896103896104, |
| "loss": 1.0381, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.2267436534166336, |
| "learning_rate": 0.00010974025974025974, |
| "loss": 1.0818, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 0.23742735385894775, |
| "learning_rate": 0.00010909090909090909, |
| "loss": 1.0872, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.17787213623523712, |
| "learning_rate": 0.00010844155844155846, |
| "loss": 1.03, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 0.22422832250595093, |
| "learning_rate": 0.0001077922077922078, |
| "loss": 1.0738, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.22946301102638245, |
| "learning_rate": 0.00010714285714285715, |
| "loss": 1.0274, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.2137996405363083, |
| "learning_rate": 0.00010649350649350649, |
| "loss": 1.0539, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.1748756766319275, |
| "learning_rate": 0.00010584415584415586, |
| "loss": 1.0355, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.22275175154209137, |
| "learning_rate": 0.0001051948051948052, |
| "loss": 1.1696, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 0.20996077358722687, |
| "learning_rate": 0.00010454545454545455, |
| "loss": 1.0303, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 0.1945938766002655, |
| "learning_rate": 0.00010389610389610389, |
| "loss": 0.9747, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.1970377266407013, |
| "learning_rate": 0.00010324675324675325, |
| "loss": 1.0358, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 0.18814732134342194, |
| "learning_rate": 0.00010259740259740261, |
| "loss": 0.9612, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.2153233289718628, |
| "learning_rate": 0.00010194805194805195, |
| "loss": 1.0749, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.21788008511066437, |
| "learning_rate": 0.0001012987012987013, |
| "loss": 1.0883, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.214650496840477, |
| "learning_rate": 0.00010064935064935067, |
| "loss": 1.0539, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.19312834739685059, |
| "learning_rate": 0.0001, |
| "loss": 1.0657, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 0.19916598498821259, |
| "learning_rate": 9.935064935064936e-05, |
| "loss": 1.0478, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.2057606726884842, |
| "learning_rate": 9.870129870129871e-05, |
| "loss": 1.0094, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.22159607708454132, |
| "learning_rate": 9.805194805194806e-05, |
| "loss": 1.0952, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 0.18274275958538055, |
| "learning_rate": 9.74025974025974e-05, |
| "loss": 1.0065, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.19835162162780762, |
| "learning_rate": 9.675324675324677e-05, |
| "loss": 1.0742, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.2114904820919037, |
| "learning_rate": 9.610389610389611e-05, |
| "loss": 1.1109, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.21488523483276367, |
| "learning_rate": 9.545454545454546e-05, |
| "loss": 1.0465, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 0.19870303571224213, |
| "learning_rate": 9.480519480519481e-05, |
| "loss": 1.0318, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.20413029193878174, |
| "learning_rate": 9.415584415584417e-05, |
| "loss": 1.0817, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.1847231239080429, |
| "learning_rate": 9.35064935064935e-05, |
| "loss": 1.0144, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 0.2715964913368225, |
| "learning_rate": 9.285714285714286e-05, |
| "loss": 0.9832, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.2225002497434616, |
| "learning_rate": 9.220779220779221e-05, |
| "loss": 1.1051, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.22931510210037231, |
| "learning_rate": 9.155844155844156e-05, |
| "loss": 1.1042, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 0.21848627924919128, |
| "learning_rate": 9.090909090909092e-05, |
| "loss": 1.1151, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.19852259755134583, |
| "learning_rate": 9.025974025974027e-05, |
| "loss": 1.0889, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 0.2080363780260086, |
| "learning_rate": 8.961038961038961e-05, |
| "loss": 1.0777, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 0.22391024231910706, |
| "learning_rate": 8.896103896103896e-05, |
| "loss": 1.1092, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 0.21793846786022186, |
| "learning_rate": 8.831168831168831e-05, |
| "loss": 1.044, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.2009749859571457, |
| "learning_rate": 8.766233766233767e-05, |
| "loss": 1.0198, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.19432318210601807, |
| "learning_rate": 8.701298701298701e-05, |
| "loss": 1.075, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.18634547293186188, |
| "learning_rate": 8.636363636363637e-05, |
| "loss": 0.9964, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 0.1947103589773178, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 1.0025, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.23098671436309814, |
| "learning_rate": 8.506493506493507e-05, |
| "loss": 1.0562, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.19686414301395416, |
| "learning_rate": 8.441558441558442e-05, |
| "loss": 1.0285, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.19852428138256073, |
| "learning_rate": 8.376623376623377e-05, |
| "loss": 1.0054, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 0.21483510732650757, |
| "learning_rate": 8.311688311688312e-05, |
| "loss": 1.108, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.23313644528388977, |
| "learning_rate": 8.246753246753248e-05, |
| "loss": 1.1383, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 0.21453145146369934, |
| "learning_rate": 8.181818181818183e-05, |
| "loss": 1.0911, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 0.20268195867538452, |
| "learning_rate": 8.116883116883117e-05, |
| "loss": 1.0145, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.20576398074626923, |
| "learning_rate": 8.051948051948052e-05, |
| "loss": 1.0829, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.21732626855373383, |
| "learning_rate": 7.987012987012987e-05, |
| "loss": 1.0152, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 0.22046895325183868, |
| "learning_rate": 7.922077922077923e-05, |
| "loss": 1.1311, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 0.19727715849876404, |
| "learning_rate": 7.857142857142858e-05, |
| "loss": 1.0364, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 0.20861488580703735, |
| "learning_rate": 7.792207792207793e-05, |
| "loss": 1.0435, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.18545083701610565, |
| "learning_rate": 7.727272727272727e-05, |
| "loss": 1.0299, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.19965052604675293, |
| "learning_rate": 7.662337662337662e-05, |
| "loss": 1.0511, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.23673909902572632, |
| "learning_rate": 7.597402597402598e-05, |
| "loss": 1.081, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.17583179473876953, |
| "learning_rate": 7.532467532467533e-05, |
| "loss": 0.9808, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.2129366099834442, |
| "learning_rate": 7.467532467532467e-05, |
| "loss": 1.0522, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.21679140627384186, |
| "learning_rate": 7.402597402597404e-05, |
| "loss": 1.0567, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 0.2032000720500946, |
| "learning_rate": 7.337662337662338e-05, |
| "loss": 1.0466, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 0.1887970268726349, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 1.0329, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.21060192584991455, |
| "learning_rate": 7.207792207792208e-05, |
| "loss": 1.1021, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.21191425621509552, |
| "learning_rate": 7.142857142857143e-05, |
| "loss": 0.99, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.1995989829301834, |
| "learning_rate": 7.077922077922077e-05, |
| "loss": 1.0526, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.1849513053894043, |
| "learning_rate": 7.012987012987014e-05, |
| "loss": 0.9998, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.1948779672384262, |
| "learning_rate": 6.948051948051948e-05, |
| "loss": 1.075, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.20374052226543427, |
| "learning_rate": 6.883116883116883e-05, |
| "loss": 1.0933, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 0.2102465033531189, |
| "learning_rate": 6.818181818181818e-05, |
| "loss": 1.1123, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.21376173198223114, |
| "learning_rate": 6.753246753246754e-05, |
| "loss": 1.1233, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.20934203267097473, |
| "learning_rate": 6.688311688311688e-05, |
| "loss": 1.1374, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.18604128062725067, |
| "learning_rate": 6.623376623376624e-05, |
| "loss": 1.0213, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 0.19644233584403992, |
| "learning_rate": 6.55844155844156e-05, |
| "loss": 1.0046, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 0.18479463458061218, |
| "learning_rate": 6.493506493506494e-05, |
| "loss": 0.9792, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.1945149153470993, |
| "learning_rate": 6.428571428571429e-05, |
| "loss": 1.0584, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 0.2070147544145584, |
| "learning_rate": 6.363636363636364e-05, |
| "loss": 1.071, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.19645985960960388, |
| "learning_rate": 6.2987012987013e-05, |
| "loss": 1.0721, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.1960117667913437, |
| "learning_rate": 6.233766233766233e-05, |
| "loss": 1.071, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.20168261229991913, |
| "learning_rate": 6.16883116883117e-05, |
| "loss": 1.0808, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.21254412829875946, |
| "learning_rate": 6.103896103896104e-05, |
| "loss": 1.0287, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.21271063387393951, |
| "learning_rate": 6.03896103896104e-05, |
| "loss": 1.0605, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 0.2081408053636551, |
| "learning_rate": 5.9740259740259744e-05, |
| "loss": 1.091, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.21113798022270203, |
| "learning_rate": 5.90909090909091e-05, |
| "loss": 1.1323, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 0.20670844614505768, |
| "learning_rate": 5.844155844155844e-05, |
| "loss": 1.0955, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.2010120451450348, |
| "learning_rate": 5.7792207792207796e-05, |
| "loss": 1.1068, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.20379121601581573, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 1.0419, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 0.22799807786941528, |
| "learning_rate": 5.64935064935065e-05, |
| "loss": 1.0904, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.2005995213985443, |
| "learning_rate": 5.584415584415584e-05, |
| "loss": 1.078, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.20329605042934418, |
| "learning_rate": 5.51948051948052e-05, |
| "loss": 1.0245, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.19283504784107208, |
| "learning_rate": 5.4545454545454546e-05, |
| "loss": 1.0367, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.20624355971813202, |
| "learning_rate": 5.38961038961039e-05, |
| "loss": 1.1046, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 0.21362991631031036, |
| "learning_rate": 5.3246753246753245e-05, |
| "loss": 1.1104, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.20447863638401031, |
| "learning_rate": 5.25974025974026e-05, |
| "loss": 1.0514, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 0.1974381059408188, |
| "learning_rate": 5.1948051948051944e-05, |
| "loss": 1.0048, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.21237170696258545, |
| "learning_rate": 5.1298701298701304e-05, |
| "loss": 1.1299, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 0.21224971115589142, |
| "learning_rate": 5.064935064935065e-05, |
| "loss": 1.05, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.19865018129348755, |
| "learning_rate": 5e-05, |
| "loss": 1.0665, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 0.19199275970458984, |
| "learning_rate": 4.9350649350649355e-05, |
| "loss": 0.9531, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.19573214650154114, |
| "learning_rate": 4.87012987012987e-05, |
| "loss": 1.0318, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.21338805556297302, |
| "learning_rate": 4.8051948051948054e-05, |
| "loss": 1.0343, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.2254691869020462, |
| "learning_rate": 4.740259740259741e-05, |
| "loss": 1.0472, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.18101665377616882, |
| "learning_rate": 4.675324675324675e-05, |
| "loss": 1.017, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 0.22090592980384827, |
| "learning_rate": 4.6103896103896106e-05, |
| "loss": 1.0389, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.20865507423877716, |
| "learning_rate": 4.545454545454546e-05, |
| "loss": 1.0369, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.21619610488414764, |
| "learning_rate": 4.4805194805194805e-05, |
| "loss": 1.109, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 0.21694771945476532, |
| "learning_rate": 4.415584415584416e-05, |
| "loss": 1.0525, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 0.2182662934064865, |
| "learning_rate": 4.3506493506493503e-05, |
| "loss": 1.0331, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.2026486098766327, |
| "learning_rate": 4.2857142857142856e-05, |
| "loss": 1.027, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.19606547057628632, |
| "learning_rate": 4.220779220779221e-05, |
| "loss": 1.0242, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.22107470035552979, |
| "learning_rate": 4.155844155844156e-05, |
| "loss": 1.0924, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.19960008561611176, |
| "learning_rate": 4.0909090909090915e-05, |
| "loss": 1.0384, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.1945488154888153, |
| "learning_rate": 4.025974025974026e-05, |
| "loss": 1.0673, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.22067414224147797, |
| "learning_rate": 3.9610389610389614e-05, |
| "loss": 1.0426, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 0.19010980427265167, |
| "learning_rate": 3.8961038961038966e-05, |
| "loss": 1.0617, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.18781176209449768, |
| "learning_rate": 3.831168831168831e-05, |
| "loss": 1.0243, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 0.20388829708099365, |
| "learning_rate": 3.7662337662337665e-05, |
| "loss": 1.0476, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.19911155104637146, |
| "learning_rate": 3.701298701298702e-05, |
| "loss": 1.0324, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.19884039461612701, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 1.0242, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.19036105275154114, |
| "learning_rate": 3.571428571428572e-05, |
| "loss": 1.0323, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.20039844512939453, |
| "learning_rate": 3.506493506493507e-05, |
| "loss": 1.0749, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.1899934560060501, |
| "learning_rate": 3.4415584415584416e-05, |
| "loss": 1.0115, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 0.20019090175628662, |
| "learning_rate": 3.376623376623377e-05, |
| "loss": 1.0782, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.2020583152770996, |
| "learning_rate": 3.311688311688312e-05, |
| "loss": 1.0687, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 0.21407337486743927, |
| "learning_rate": 3.246753246753247e-05, |
| "loss": 1.1015, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.1871640682220459, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.9637, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.21622811257839203, |
| "learning_rate": 3.1168831168831166e-05, |
| "loss": 1.1222, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.22504661977291107, |
| "learning_rate": 3.051948051948052e-05, |
| "loss": 1.132, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.19177629053592682, |
| "learning_rate": 2.9870129870129872e-05, |
| "loss": 1.0281, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.1970544159412384, |
| "learning_rate": 2.922077922077922e-05, |
| "loss": 1.0393, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.21554522216320038, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 1.074, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.21131229400634766, |
| "learning_rate": 2.792207792207792e-05, |
| "loss": 1.054, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.19816523790359497, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": 1.0456, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 0.21075209975242615, |
| "learning_rate": 2.6623376623376623e-05, |
| "loss": 1.0758, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 0.2296527624130249, |
| "learning_rate": 2.5974025974025972e-05, |
| "loss": 1.0917, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.19722610712051392, |
| "learning_rate": 2.5324675324675325e-05, |
| "loss": 1.0704, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.18721099197864532, |
| "learning_rate": 2.4675324675324678e-05, |
| "loss": 0.9919, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.20244193077087402, |
| "learning_rate": 2.4025974025974027e-05, |
| "loss": 1.0368, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.19518914818763733, |
| "learning_rate": 2.3376623376623376e-05, |
| "loss": 1.0436, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.19650357961654663, |
| "learning_rate": 2.272727272727273e-05, |
| "loss": 1.0306, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.20320096611976624, |
| "learning_rate": 2.207792207792208e-05, |
| "loss": 1.0941, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 0.18296951055526733, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 0.9802, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 0.21357610821723938, |
| "learning_rate": 2.077922077922078e-05, |
| "loss": 1.0449, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 0.193921759724617, |
| "learning_rate": 2.012987012987013e-05, |
| "loss": 1.0116, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 0.1953902244567871, |
| "learning_rate": 1.9480519480519483e-05, |
| "loss": 1.0105, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.19440975785255432, |
| "learning_rate": 1.8831168831168833e-05, |
| "loss": 0.9952, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 0.21054105460643768, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 1.0701, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 0.18844804167747498, |
| "learning_rate": 1.7532467532467535e-05, |
| "loss": 1.0146, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 0.2067311704158783, |
| "learning_rate": 1.6883116883116884e-05, |
| "loss": 1.0781, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 0.1941213756799698, |
| "learning_rate": 1.6233766233766234e-05, |
| "loss": 0.9814, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.22726193070411682, |
| "learning_rate": 1.5584415584415583e-05, |
| "loss": 1.1431, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 0.18025581538677216, |
| "learning_rate": 1.4935064935064936e-05, |
| "loss": 0.9649, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 0.21535000205039978, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 1.0441, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 0.20014546811580658, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": 1.0166, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.22738787531852722, |
| "learning_rate": 1.2987012987012986e-05, |
| "loss": 1.0564, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.2020861804485321, |
| "learning_rate": 1.2337662337662339e-05, |
| "loss": 1.1241, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 0.19888809323310852, |
| "learning_rate": 1.1688311688311688e-05, |
| "loss": 1.1114, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 0.20912377536296844, |
| "learning_rate": 1.103896103896104e-05, |
| "loss": 1.0971, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 0.21206621825695038, |
| "learning_rate": 1.038961038961039e-05, |
| "loss": 1.0601, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 0.18667680025100708, |
| "learning_rate": 9.740259740259742e-06, |
| "loss": 1.0291, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.21125559508800507, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 1.0483, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 0.21776145696640015, |
| "learning_rate": 8.441558441558442e-06, |
| "loss": 0.9912, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 0.20144303143024445, |
| "learning_rate": 7.792207792207792e-06, |
| "loss": 1.0357, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 0.1984029859304428, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 1.0648, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 0.17972829937934875, |
| "learning_rate": 6.493506493506493e-06, |
| "loss": 1.0033, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.1818286031484604, |
| "learning_rate": 5.844155844155844e-06, |
| "loss": 0.997, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 0.19670912623405457, |
| "learning_rate": 5.194805194805195e-06, |
| "loss": 1.0256, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 0.20527283847332, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 1.0348, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 0.19025909900665283, |
| "learning_rate": 3.896103896103896e-06, |
| "loss": 1.0682, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 0.19544818997383118, |
| "learning_rate": 3.2467532467532465e-06, |
| "loss": 0.9872, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.22112183272838593, |
| "learning_rate": 2.5974025974025976e-06, |
| "loss": 1.0661, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 0.23328153789043427, |
| "learning_rate": 1.948051948051948e-06, |
| "loss": 1.0691, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 0.20181375741958618, |
| "learning_rate": 1.2987012987012988e-06, |
| "loss": 0.9416, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.29312625527381897, |
| "learning_rate": 6.493506493506494e-07, |
| "loss": 1.1216, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.12202467696492762, |
| "grad_norm": 0.2231415957212448, |
| "learning_rate": 0.0, |
| "loss": 1.0468, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.12241329058583503, |
| "grad_norm": 0.22263288497924805, |
| "learning_rate": 0.00017594394706111328, |
| "loss": 1.0399, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12280190420674245, |
| "grad_norm": 0.22909891605377197, |
| "learning_rate": 0.00017586609575710393, |
| "loss": 1.1069, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.12319051782764986, |
| "grad_norm": 0.23951445519924164, |
| "learning_rate": 0.0001757882444530946, |
| "loss": 1.1036, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.12357913144855727, |
| "grad_norm": 0.2409268021583557, |
| "learning_rate": 0.00017571039314908526, |
| "loss": 1.1114, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.12396774506946469, |
| "grad_norm": 0.23753899335861206, |
| "learning_rate": 0.00017563254184507592, |
| "loss": 1.1297, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.12435635869037209, |
| "grad_norm": 0.2823902666568756, |
| "learning_rate": 0.00017555469054106657, |
| "loss": 1.1293, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12474497231127951, |
| "grad_norm": 0.24093545973300934, |
| "learning_rate": 0.00017547683923705722, |
| "loss": 1.0678, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.12513358593218693, |
| "grad_norm": 0.22565563023090363, |
| "learning_rate": 0.0001753989879330479, |
| "loss": 1.1408, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.12552219955309435, |
| "grad_norm": 0.22569572925567627, |
| "learning_rate": 0.00017532113662903855, |
| "loss": 1.0543, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.12591081317400174, |
| "grad_norm": 0.24962866306304932, |
| "learning_rate": 0.0001752432853250292, |
| "loss": 1.0818, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.12629942679490916, |
| "grad_norm": 0.22184576094150543, |
| "learning_rate": 0.00017516543402101986, |
| "loss": 1.0835, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12668804041581658, |
| "grad_norm": 0.2572194039821625, |
| "learning_rate": 0.0001750875827170105, |
| "loss": 1.0767, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.127076654036724, |
| "grad_norm": 0.24131342768669128, |
| "learning_rate": 0.00017500973141300116, |
| "loss": 1.0981, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1274652676576314, |
| "grad_norm": 0.2386389970779419, |
| "learning_rate": 0.00017493188010899184, |
| "loss": 1.0828, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1278538812785388, |
| "grad_norm": 0.2654125690460205, |
| "learning_rate": 0.0001748540288049825, |
| "loss": 1.1266, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.12824249489944622, |
| "grad_norm": 0.2925739884376526, |
| "learning_rate": 0.00017477617750097314, |
| "loss": 1.0983, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.12863110852035364, |
| "grad_norm": 0.26589342951774597, |
| "learning_rate": 0.0001746983261969638, |
| "loss": 1.1029, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.12901972214126106, |
| "grad_norm": 0.24565957486629486, |
| "learning_rate": 0.00017462047489295445, |
| "loss": 1.0975, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.12940833576216845, |
| "grad_norm": 0.2459682673215866, |
| "learning_rate": 0.00017454262358894513, |
| "loss": 1.0566, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.12979694938307587, |
| "grad_norm": 0.23349183797836304, |
| "learning_rate": 0.00017446477228493578, |
| "loss": 1.0833, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.1301855630039833, |
| "grad_norm": 0.26166337728500366, |
| "learning_rate": 0.00017438692098092643, |
| "loss": 1.1598, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.1305741766248907, |
| "grad_norm": 0.24188168346881866, |
| "learning_rate": 0.00017430906967691708, |
| "loss": 1.0728, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.13096279024579813, |
| "grad_norm": 0.22922398149967194, |
| "learning_rate": 0.00017423121837290773, |
| "loss": 1.0311, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.13135140386670552, |
| "grad_norm": 0.2652754485607147, |
| "learning_rate": 0.00017415336706889841, |
| "loss": 1.1096, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.13174001748761294, |
| "grad_norm": 0.2355881780385971, |
| "learning_rate": 0.00017407551576488907, |
| "loss": 1.0964, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.13212863110852036, |
| "grad_norm": 0.244523823261261, |
| "learning_rate": 0.00017399766446087972, |
| "loss": 1.142, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13251724472942777, |
| "grad_norm": 0.24705976247787476, |
| "learning_rate": 0.00017391981315687037, |
| "loss": 1.0943, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.13290585835033517, |
| "grad_norm": 0.22817552089691162, |
| "learning_rate": 0.00017384196185286102, |
| "loss": 1.0621, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.13329447197124258, |
| "grad_norm": 0.22605225443840027, |
| "learning_rate": 0.0001737641105488517, |
| "loss": 1.0714, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.13368308559215, |
| "grad_norm": 0.2584545314311981, |
| "learning_rate": 0.00017368625924484235, |
| "loss": 1.1367, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.13407169921305742, |
| "grad_norm": 0.2248220443725586, |
| "learning_rate": 0.000173608407940833, |
| "loss": 1.0872, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.13446031283396484, |
| "grad_norm": 0.2141868770122528, |
| "learning_rate": 0.00017353055663682368, |
| "loss": 1.0572, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.13484892645487223, |
| "grad_norm": 0.2615523934364319, |
| "learning_rate": 0.00017345270533281434, |
| "loss": 1.1048, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.13523754007577965, |
| "grad_norm": 0.22990448772907257, |
| "learning_rate": 0.000173374854028805, |
| "loss": 1.0528, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.13562615369668707, |
| "grad_norm": 0.2132262885570526, |
| "learning_rate": 0.00017329700272479564, |
| "loss": 1.0476, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1360147673175945, |
| "grad_norm": 0.2578272819519043, |
| "learning_rate": 0.00017321915142078632, |
| "loss": 1.0852, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1364033809385019, |
| "grad_norm": 0.22881457209587097, |
| "learning_rate": 0.00017314130011677697, |
| "loss": 1.1017, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1367919945594093, |
| "grad_norm": 0.21067696809768677, |
| "learning_rate": 0.00017306344881276762, |
| "loss": 1.0444, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.13718060818031672, |
| "grad_norm": 0.2304215282201767, |
| "learning_rate": 0.0001729855975087583, |
| "loss": 1.0737, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.13756922180122413, |
| "grad_norm": 0.2031925916671753, |
| "learning_rate": 0.00017290774620474895, |
| "loss": 1.0036, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.13795783542213155, |
| "grad_norm": 0.27281051874160767, |
| "learning_rate": 0.0001728298949007396, |
| "loss": 1.148, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.13834644904303897, |
| "grad_norm": 0.204191654920578, |
| "learning_rate": 0.00017275204359673026, |
| "loss": 0.9607, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.13873506266394636, |
| "grad_norm": 0.221976637840271, |
| "learning_rate": 0.0001726741922927209, |
| "loss": 1.1068, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.13912367628485378, |
| "grad_norm": 0.20831729471683502, |
| "learning_rate": 0.0001725963409887116, |
| "loss": 1.034, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1395122899057612, |
| "grad_norm": 0.21639779210090637, |
| "learning_rate": 0.00017251848968470224, |
| "loss": 1.0613, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.13990090352666862, |
| "grad_norm": 0.1959424465894699, |
| "learning_rate": 0.0001724406383806929, |
| "loss": 1.0506, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.140289517147576, |
| "grad_norm": 0.2044398933649063, |
| "learning_rate": 0.00017236278707668355, |
| "loss": 1.0316, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.14067813076848343, |
| "grad_norm": 0.21483004093170166, |
| "learning_rate": 0.0001722849357726742, |
| "loss": 1.0361, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.14106674438939085, |
| "grad_norm": 0.237701416015625, |
| "learning_rate": 0.00017220708446866485, |
| "loss": 1.1264, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.14145535801029827, |
| "grad_norm": 0.20750795304775238, |
| "learning_rate": 0.00017212923316465553, |
| "loss": 1.0523, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 0.2252965271472931, |
| "learning_rate": 0.00017205138186064618, |
| "loss": 1.0764, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.14223258525211308, |
| "grad_norm": 0.2033565789461136, |
| "learning_rate": 0.00017197353055663683, |
| "loss": 1.064, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.1426211988730205, |
| "grad_norm": 0.21123190224170685, |
| "learning_rate": 0.00017189567925262749, |
| "loss": 1.0515, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.1430098124939279, |
| "grad_norm": 0.20646221935749054, |
| "learning_rate": 0.00017181782794861814, |
| "loss": 1.0617, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.14339842611483533, |
| "grad_norm": 0.2079589068889618, |
| "learning_rate": 0.00017173997664460882, |
| "loss": 1.0569, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.14378703973574275, |
| "grad_norm": 0.216246098279953, |
| "learning_rate": 0.00017166212534059947, |
| "loss": 1.0986, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14417565335665014, |
| "grad_norm": 0.20711806416511536, |
| "learning_rate": 0.00017158427403659012, |
| "loss": 1.1342, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.14456426697755756, |
| "grad_norm": 0.235435351729393, |
| "learning_rate": 0.00017150642273258077, |
| "loss": 1.1082, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.14495288059846498, |
| "grad_norm": 0.2273191511631012, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 1.1064, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.1453414942193724, |
| "grad_norm": 0.2075672745704651, |
| "learning_rate": 0.0001713507201245621, |
| "loss": 1.0536, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.14573010784027982, |
| "grad_norm": 0.20764274895191193, |
| "learning_rate": 0.00017127286882055276, |
| "loss": 1.0673, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1461187214611872, |
| "grad_norm": 0.2441243678331375, |
| "learning_rate": 0.0001711950175165434, |
| "loss": 1.1271, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.14650733508209463, |
| "grad_norm": 0.2383374124765396, |
| "learning_rate": 0.00017111716621253406, |
| "loss": 1.083, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.14689594870300204, |
| "grad_norm": 0.2172410786151886, |
| "learning_rate": 0.0001710393149085247, |
| "loss": 1.0605, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.14728456232390946, |
| "grad_norm": 0.22591541707515717, |
| "learning_rate": 0.0001709614636045154, |
| "loss": 1.0931, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.14767317594481685, |
| "grad_norm": 0.23099495470523834, |
| "learning_rate": 0.00017088361230050604, |
| "loss": 1.1021, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14806178956572427, |
| "grad_norm": 0.21461094915866852, |
| "learning_rate": 0.0001708057609964967, |
| "loss": 1.0959, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1484504031866317, |
| "grad_norm": 0.21557241678237915, |
| "learning_rate": 0.00017072790969248735, |
| "loss": 1.0155, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.1488390168075391, |
| "grad_norm": 0.234396293759346, |
| "learning_rate": 0.000170650058388478, |
| "loss": 1.1289, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.14922763042844653, |
| "grad_norm": 0.22895503044128418, |
| "learning_rate": 0.00017057220708446868, |
| "loss": 0.9919, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.14961624404935392, |
| "grad_norm": 0.2054683268070221, |
| "learning_rate": 0.00017049435578045933, |
| "loss": 1.0607, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.15000485767026134, |
| "grad_norm": 0.25569215416908264, |
| "learning_rate": 0.00017041650447644998, |
| "loss": 1.0517, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.15039347129116876, |
| "grad_norm": 0.2222641259431839, |
| "learning_rate": 0.00017033865317244064, |
| "loss": 1.0404, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.15078208491207618, |
| "grad_norm": 0.20501169562339783, |
| "learning_rate": 0.0001702608018684313, |
| "loss": 0.9897, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1511706985329836, |
| "grad_norm": 0.22080403566360474, |
| "learning_rate": 0.00017018295056442197, |
| "loss": 1.1013, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.15155931215389098, |
| "grad_norm": 0.21218529343605042, |
| "learning_rate": 0.00017010509926041262, |
| "loss": 1.0541, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1519479257747984, |
| "grad_norm": 0.23064807057380676, |
| "learning_rate": 0.00017002724795640327, |
| "loss": 1.037, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.15233653939570582, |
| "grad_norm": 0.21164493262767792, |
| "learning_rate": 0.00016994939665239392, |
| "loss": 1.0769, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.15272515301661324, |
| "grad_norm": 0.22565549612045288, |
| "learning_rate": 0.00016987154534838457, |
| "loss": 1.0638, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.15311376663752063, |
| "grad_norm": 0.22492647171020508, |
| "learning_rate": 0.00016979369404437525, |
| "loss": 1.063, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.15350238025842805, |
| "grad_norm": 0.22335395216941833, |
| "learning_rate": 0.0001697158427403659, |
| "loss": 1.1032, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.15389099387933547, |
| "grad_norm": 0.2164154201745987, |
| "learning_rate": 0.00016963799143635656, |
| "loss": 1.1275, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.1542796075002429, |
| "grad_norm": 0.22547736763954163, |
| "learning_rate": 0.0001695601401323472, |
| "loss": 1.1324, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.1546682211211503, |
| "grad_norm": 0.2028045952320099, |
| "learning_rate": 0.0001694822888283379, |
| "loss": 1.0057, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.1550568347420577, |
| "grad_norm": 0.20770573616027832, |
| "learning_rate": 0.00016940443752432854, |
| "loss": 1.0311, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.15544544836296512, |
| "grad_norm": 0.2231476902961731, |
| "learning_rate": 0.0001693265862203192, |
| "loss": 1.0535, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15583406198387253, |
| "grad_norm": 0.21618099510669708, |
| "learning_rate": 0.00016924873491630987, |
| "loss": 1.0616, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.15622267560477995, |
| "grad_norm": 0.24024419486522675, |
| "learning_rate": 0.00016917088361230052, |
| "loss": 1.1324, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.15661128922568737, |
| "grad_norm": 0.2002171128988266, |
| "learning_rate": 0.00016909303230829118, |
| "loss": 1.015, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.15699990284659476, |
| "grad_norm": 0.21771477162837982, |
| "learning_rate": 0.00016901518100428183, |
| "loss": 1.0817, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.15738851646750218, |
| "grad_norm": 0.22052259743213654, |
| "learning_rate": 0.0001689373297002725, |
| "loss": 1.0836, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.1577771300884096, |
| "grad_norm": 0.1964062750339508, |
| "learning_rate": 0.00016885947839626316, |
| "loss": 1.0505, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.15816574370931702, |
| "grad_norm": 0.22714298963546753, |
| "learning_rate": 0.0001687816270922538, |
| "loss": 1.0702, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.15855435733022444, |
| "grad_norm": 0.20647728443145752, |
| "learning_rate": 0.00016870377578824446, |
| "loss": 1.0349, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.15894297095113183, |
| "grad_norm": 0.2355160117149353, |
| "learning_rate": 0.00016862592448423512, |
| "loss": 1.0305, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.15933158457203925, |
| "grad_norm": 0.22890770435333252, |
| "learning_rate": 0.0001685480731802258, |
| "loss": 1.0854, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15972019819294667, |
| "grad_norm": 0.21947838366031647, |
| "learning_rate": 0.00016847022187621645, |
| "loss": 1.0948, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.16010881181385409, |
| "grad_norm": 0.22334899008274078, |
| "learning_rate": 0.0001683923705722071, |
| "loss": 1.006, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.16049742543476148, |
| "grad_norm": 0.22324936091899872, |
| "learning_rate": 0.00016831451926819775, |
| "loss": 1.0402, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1608860390556689, |
| "grad_norm": 0.21462097764015198, |
| "learning_rate": 0.0001682366679641884, |
| "loss": 1.077, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.1612746526765763, |
| "grad_norm": 0.24567006528377533, |
| "learning_rate": 0.00016815881666017908, |
| "loss": 1.15, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.16166326629748373, |
| "grad_norm": 0.26437243819236755, |
| "learning_rate": 0.00016808096535616973, |
| "loss": 1.1251, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.16205187991839115, |
| "grad_norm": 0.2217959761619568, |
| "learning_rate": 0.00016800311405216039, |
| "loss": 1.1103, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.16244049353929854, |
| "grad_norm": 0.24402475357055664, |
| "learning_rate": 0.00016792526274815104, |
| "loss": 1.0672, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.16282910716020596, |
| "grad_norm": 0.21609526872634888, |
| "learning_rate": 0.0001678474114441417, |
| "loss": 1.0291, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.16321772078111338, |
| "grad_norm": 0.20054642856121063, |
| "learning_rate": 0.00016776956014013237, |
| "loss": 1.0704, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1636063344020208, |
| "grad_norm": 0.22864869236946106, |
| "learning_rate": 0.00016769170883612302, |
| "loss": 1.0612, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.16399494802292822, |
| "grad_norm": 0.22651974856853485, |
| "learning_rate": 0.00016761385753211367, |
| "loss": 1.0749, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1643835616438356, |
| "grad_norm": 0.21587328612804413, |
| "learning_rate": 0.00016753600622810433, |
| "loss": 1.0398, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.16477217526474303, |
| "grad_norm": 0.1953774094581604, |
| "learning_rate": 0.00016745815492409498, |
| "loss": 1.0275, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.16516078888565044, |
| "grad_norm": 0.21803410351276398, |
| "learning_rate": 0.00016738030362008566, |
| "loss": 1.1219, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.16554940250655786, |
| "grad_norm": 0.2034682035446167, |
| "learning_rate": 0.0001673024523160763, |
| "loss": 1.0342, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.16593801612746525, |
| "grad_norm": 0.20135951042175293, |
| "learning_rate": 0.00016722460101206696, |
| "loss": 0.9802, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.16632662974837267, |
| "grad_norm": 0.23310376703739166, |
| "learning_rate": 0.0001671467497080576, |
| "loss": 1.0789, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1667152433692801, |
| "grad_norm": 0.21475404500961304, |
| "learning_rate": 0.00016706889840404827, |
| "loss": 1.0416, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.1671038569901875, |
| "grad_norm": 0.21661072969436646, |
| "learning_rate": 0.00016699104710003894, |
| "loss": 1.0568, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.16749247061109493, |
| "grad_norm": 0.20310629904270172, |
| "learning_rate": 0.0001669131957960296, |
| "loss": 0.9968, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.16788108423200232, |
| "grad_norm": 0.2596947252750397, |
| "learning_rate": 0.00016683534449202025, |
| "loss": 1.0478, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.16826969785290974, |
| "grad_norm": 0.22226987779140472, |
| "learning_rate": 0.0001667574931880109, |
| "loss": 1.0898, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.16865831147381716, |
| "grad_norm": 0.22499911487102509, |
| "learning_rate": 0.00016667964188400155, |
| "loss": 1.07, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.16904692509472458, |
| "grad_norm": 0.2717292308807373, |
| "learning_rate": 0.0001666017905799922, |
| "loss": 1.0562, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.169435538715632, |
| "grad_norm": 0.22052323818206787, |
| "learning_rate": 0.00016652393927598288, |
| "loss": 1.0732, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.16982415233653939, |
| "grad_norm": 0.21741728484630585, |
| "learning_rate": 0.00016644608797197354, |
| "loss": 1.0409, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 0.20701193809509277, |
| "learning_rate": 0.0001663682366679642, |
| "loss": 1.0731, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.17060137957835422, |
| "grad_norm": 0.22071130573749542, |
| "learning_rate": 0.00016629038536395484, |
| "loss": 1.0992, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.17098999319926164, |
| "grad_norm": 0.20261412858963013, |
| "learning_rate": 0.0001662125340599455, |
| "loss": 1.0051, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17137860682016906, |
| "grad_norm": 0.2082947939634323, |
| "learning_rate": 0.00016613468275593617, |
| "loss": 1.0477, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.17176722044107645, |
| "grad_norm": 0.22534717619419098, |
| "learning_rate": 0.00016605683145192682, |
| "loss": 1.041, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.17215583406198387, |
| "grad_norm": 0.21547731757164001, |
| "learning_rate": 0.00016597898014791748, |
| "loss": 1.0528, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.1725444476828913, |
| "grad_norm": 0.24141089618206024, |
| "learning_rate": 0.00016590112884390813, |
| "loss": 1.0928, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.1729330613037987, |
| "grad_norm": 0.21910884976387024, |
| "learning_rate": 0.00016582327753989878, |
| "loss": 1.063, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.1733216749247061, |
| "grad_norm": 0.21782316267490387, |
| "learning_rate": 0.00016574542623588946, |
| "loss": 1.0976, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.17371028854561352, |
| "grad_norm": 0.21771778166294098, |
| "learning_rate": 0.0001656675749318801, |
| "loss": 1.0677, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.17409890216652094, |
| "grad_norm": 0.22117659449577332, |
| "learning_rate": 0.00016558972362787076, |
| "loss": 1.0669, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.17448751578742835, |
| "grad_norm": 0.21918092668056488, |
| "learning_rate": 0.00016551187232386141, |
| "loss": 1.0955, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.17487612940833577, |
| "grad_norm": 0.22027818858623505, |
| "learning_rate": 0.0001654340210198521, |
| "loss": 1.0201, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17526474302924316, |
| "grad_norm": 0.2042885720729828, |
| "learning_rate": 0.00016535616971584275, |
| "loss": 1.0881, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.17565335665015058, |
| "grad_norm": 0.21788261830806732, |
| "learning_rate": 0.0001652783184118334, |
| "loss": 1.0918, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.176041970271058, |
| "grad_norm": 0.23332571983337402, |
| "learning_rate": 0.00016520046710782408, |
| "loss": 1.091, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.17643058389196542, |
| "grad_norm": 0.20204192399978638, |
| "learning_rate": 0.00016512261580381473, |
| "loss": 1.0366, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.17681919751287284, |
| "grad_norm": 0.21761906147003174, |
| "learning_rate": 0.00016504476449980538, |
| "loss": 1.0131, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.17720781113378023, |
| "grad_norm": 0.2152051478624344, |
| "learning_rate": 0.00016496691319579606, |
| "loss": 1.0868, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.17759642475468765, |
| "grad_norm": 0.22776494920253754, |
| "learning_rate": 0.0001648890618917867, |
| "loss": 1.0807, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.17798503837559507, |
| "grad_norm": 0.2171342968940735, |
| "learning_rate": 0.00016481121058777736, |
| "loss": 1.0537, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.17837365199650249, |
| "grad_norm": 0.2046273946762085, |
| "learning_rate": 0.00016473335928376802, |
| "loss": 1.0097, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.17876226561740988, |
| "grad_norm": 0.2047681361436844, |
| "learning_rate": 0.00016465550797975867, |
| "loss": 1.0204, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1791508792383173, |
| "grad_norm": 0.1876862645149231, |
| "learning_rate": 0.00016457765667574935, |
| "loss": 0.9383, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.17953949285922471, |
| "grad_norm": 0.218430757522583, |
| "learning_rate": 0.00016449980537174, |
| "loss": 1.0721, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.17992810648013213, |
| "grad_norm": 0.2245480865240097, |
| "learning_rate": 0.00016442195406773065, |
| "loss": 1.0859, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.18031672010103955, |
| "grad_norm": 0.22577151656150818, |
| "learning_rate": 0.0001643441027637213, |
| "loss": 1.0825, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.18070533372194694, |
| "grad_norm": 0.20132745802402496, |
| "learning_rate": 0.00016426625145971196, |
| "loss": 1.0615, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.18109394734285436, |
| "grad_norm": 0.2277505248785019, |
| "learning_rate": 0.00016418840015570263, |
| "loss": 1.0426, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.18148256096376178, |
| "grad_norm": 0.22540105879306793, |
| "learning_rate": 0.0001641105488516933, |
| "loss": 1.0481, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1818711745846692, |
| "grad_norm": 0.20358088612556458, |
| "learning_rate": 0.00016403269754768394, |
| "loss": 1.0286, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.18225978820557662, |
| "grad_norm": 0.22534145414829254, |
| "learning_rate": 0.0001639548462436746, |
| "loss": 1.1183, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.182648401826484, |
| "grad_norm": 0.2188873142004013, |
| "learning_rate": 0.00016387699493966524, |
| "loss": 1.0439, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18303701544739143, |
| "grad_norm": 0.2128048539161682, |
| "learning_rate": 0.00016379914363565592, |
| "loss": 1.027, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.18342562906829885, |
| "grad_norm": 0.2518141567707062, |
| "learning_rate": 0.00016372129233164657, |
| "loss": 1.0468, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.18381424268920626, |
| "grad_norm": 0.2189142256975174, |
| "learning_rate": 0.00016364344102763723, |
| "loss": 1.0581, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.18420285631011368, |
| "grad_norm": 0.31266725063323975, |
| "learning_rate": 0.00016356558972362788, |
| "loss": 1.0554, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.18459146993102107, |
| "grad_norm": 0.21343916654586792, |
| "learning_rate": 0.00016348773841961853, |
| "loss": 1.0795, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.1849800835519285, |
| "grad_norm": 0.22907280921936035, |
| "learning_rate": 0.00016340988711560918, |
| "loss": 1.0304, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.1853686971728359, |
| "grad_norm": 0.2105257511138916, |
| "learning_rate": 0.00016333203581159986, |
| "loss": 1.0231, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.18575731079374333, |
| "grad_norm": 0.19537831842899323, |
| "learning_rate": 0.00016325418450759051, |
| "loss": 1.0103, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.18614592441465072, |
| "grad_norm": 0.20522372424602509, |
| "learning_rate": 0.00016317633320358117, |
| "loss": 1.0196, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.18653453803555814, |
| "grad_norm": 0.21646477282047272, |
| "learning_rate": 0.00016309848189957182, |
| "loss": 1.0579, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18692315165646556, |
| "grad_norm": 0.21077193319797516, |
| "learning_rate": 0.00016302063059556247, |
| "loss": 1.0638, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.18731176527737298, |
| "grad_norm": 0.20357473194599152, |
| "learning_rate": 0.00016294277929155315, |
| "loss": 1.0635, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.1877003788982804, |
| "grad_norm": 0.2188001275062561, |
| "learning_rate": 0.0001628649279875438, |
| "loss": 1.0267, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1880889925191878, |
| "grad_norm": 0.2128928154706955, |
| "learning_rate": 0.00016278707668353445, |
| "loss": 0.9706, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.1884776061400952, |
| "grad_norm": 0.22081372141838074, |
| "learning_rate": 0.0001627092253795251, |
| "loss": 1.08, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.18886621976100262, |
| "grad_norm": 0.2250615805387497, |
| "learning_rate": 0.00016263137407551576, |
| "loss": 1.1451, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.18925483338191004, |
| "grad_norm": 0.1984967589378357, |
| "learning_rate": 0.00016255352277150644, |
| "loss": 1.0744, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.18964344700281746, |
| "grad_norm": 0.20778900384902954, |
| "learning_rate": 0.0001624756714674971, |
| "loss": 1.0623, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.19003206062372485, |
| "grad_norm": 0.2026563137769699, |
| "learning_rate": 0.00016239782016348774, |
| "loss": 1.0714, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.19042067424463227, |
| "grad_norm": 0.21598374843597412, |
| "learning_rate": 0.0001623199688594784, |
| "loss": 1.0869, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1908092878655397, |
| "grad_norm": 0.18944978713989258, |
| "learning_rate": 0.00016224211755546904, |
| "loss": 1.055, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.1911979014864471, |
| "grad_norm": 0.20698946714401245, |
| "learning_rate": 0.00016216426625145972, |
| "loss": 1.0392, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.1915865151073545, |
| "grad_norm": 0.22395353019237518, |
| "learning_rate": 0.00016208641494745038, |
| "loss": 1.0681, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.19197512872826192, |
| "grad_norm": 0.22372962534427643, |
| "learning_rate": 0.00016200856364344103, |
| "loss": 1.0767, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.19236374234916934, |
| "grad_norm": 0.2066701054573059, |
| "learning_rate": 0.00016193071233943168, |
| "loss": 1.0061, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.19275235597007676, |
| "grad_norm": 0.19716408848762512, |
| "learning_rate": 0.00016185286103542233, |
| "loss": 1.039, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.19314096959098417, |
| "grad_norm": 0.22159601747989655, |
| "learning_rate": 0.000161775009731413, |
| "loss": 1.0832, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.19352958321189156, |
| "grad_norm": 0.21509626507759094, |
| "learning_rate": 0.00016169715842740366, |
| "loss": 1.0264, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.19391819683279898, |
| "grad_norm": 0.21598199009895325, |
| "learning_rate": 0.00016161930712339431, |
| "loss": 1.049, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.1943068104537064, |
| "grad_norm": 0.20279590785503387, |
| "learning_rate": 0.00016154145581938497, |
| "loss": 1.0505, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19469542407461382, |
| "grad_norm": 0.21796855330467224, |
| "learning_rate": 0.00016146360451537565, |
| "loss": 1.0885, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.19508403769552124, |
| "grad_norm": 0.22128933668136597, |
| "learning_rate": 0.0001613857532113663, |
| "loss": 1.0903, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.19547265131642863, |
| "grad_norm": 0.2032536417245865, |
| "learning_rate": 0.00016130790190735695, |
| "loss": 1.0285, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.19586126493733605, |
| "grad_norm": 0.23738974332809448, |
| "learning_rate": 0.0001612300506033476, |
| "loss": 1.1188, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.19624987855824347, |
| "grad_norm": 0.19614790380001068, |
| "learning_rate": 0.00016115219929933828, |
| "loss": 1.04, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1966384921791509, |
| "grad_norm": 0.2198178917169571, |
| "learning_rate": 0.00016107434799532893, |
| "loss": 1.0696, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.1970271058000583, |
| "grad_norm": 0.18814648687839508, |
| "learning_rate": 0.00016099649669131959, |
| "loss": 1.0203, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1974157194209657, |
| "grad_norm": 0.20699037611484528, |
| "learning_rate": 0.00016091864538731026, |
| "loss": 1.1074, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.19780433304187311, |
| "grad_norm": 0.21490445733070374, |
| "learning_rate": 0.00016084079408330092, |
| "loss": 1.0682, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.19819294666278053, |
| "grad_norm": 0.2363848090171814, |
| "learning_rate": 0.00016076294277929157, |
| "loss": 1.0408, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19858156028368795, |
| "grad_norm": 0.20186659693717957, |
| "learning_rate": 0.00016068509147528222, |
| "loss": 1.026, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.19897017390459534, |
| "grad_norm": 0.21564024686813354, |
| "learning_rate": 0.00016060724017127287, |
| "loss": 1.0418, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.19935878752550276, |
| "grad_norm": 0.19151560962200165, |
| "learning_rate": 0.00016052938886726355, |
| "loss": 1.0037, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.19974740114641018, |
| "grad_norm": 0.21038194000720978, |
| "learning_rate": 0.0001604515375632542, |
| "loss": 1.0545, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2001360147673176, |
| "grad_norm": 0.20496582984924316, |
| "learning_rate": 0.00016037368625924486, |
| "loss": 1.0543, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.20052462838822502, |
| "grad_norm": 0.20689113438129425, |
| "learning_rate": 0.0001602958349552355, |
| "loss": 1.0905, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2009132420091324, |
| "grad_norm": 0.2284041792154312, |
| "learning_rate": 0.00016021798365122616, |
| "loss": 1.0717, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.20130185563003983, |
| "grad_norm": 0.23457761108875275, |
| "learning_rate": 0.00016014013234721684, |
| "loss": 1.106, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.20169046925094725, |
| "grad_norm": 0.2088528722524643, |
| "learning_rate": 0.0001600622810432075, |
| "loss": 1.0428, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.20207908287185467, |
| "grad_norm": 0.2170068770647049, |
| "learning_rate": 0.00015998442973919814, |
| "loss": 0.9875, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.20246769649276208, |
| "grad_norm": 0.2270561158657074, |
| "learning_rate": 0.0001599065784351888, |
| "loss": 1.0676, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.20285631011366947, |
| "grad_norm": 0.2151324599981308, |
| "learning_rate": 0.00015982872713117945, |
| "loss": 1.0675, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2032449237345769, |
| "grad_norm": 0.23113249242305756, |
| "learning_rate": 0.00015975087582717013, |
| "loss": 1.0608, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2036335373554843, |
| "grad_norm": 0.2587106227874756, |
| "learning_rate": 0.00015967302452316078, |
| "loss": 1.0867, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.20402215097639173, |
| "grad_norm": 0.21842992305755615, |
| "learning_rate": 0.00015959517321915143, |
| "loss": 1.0726, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.20441076459729912, |
| "grad_norm": 0.20867805182933807, |
| "learning_rate": 0.00015951732191514208, |
| "loss": 1.0578, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.20479937821820654, |
| "grad_norm": 0.2396962195634842, |
| "learning_rate": 0.00015943947061113273, |
| "loss": 1.0292, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.20518799183911396, |
| "grad_norm": 0.221155047416687, |
| "learning_rate": 0.00015936161930712341, |
| "loss": 1.0019, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.20557660546002138, |
| "grad_norm": 0.20032119750976562, |
| "learning_rate": 0.00015928376800311407, |
| "loss": 1.0435, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2059652190809288, |
| "grad_norm": 0.24095888435840607, |
| "learning_rate": 0.00015920591669910472, |
| "loss": 1.0355, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2063538327018362, |
| "grad_norm": 0.2286604344844818, |
| "learning_rate": 0.00015912806539509537, |
| "loss": 0.9989, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2067424463227436, |
| "grad_norm": 0.21537137031555176, |
| "learning_rate": 0.00015905021409108602, |
| "loss": 1.0642, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.20713105994365102, |
| "grad_norm": 0.22447925806045532, |
| "learning_rate": 0.0001589723627870767, |
| "loss": 1.1244, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.20751967356455844, |
| "grad_norm": 0.21077273786067963, |
| "learning_rate": 0.00015889451148306735, |
| "loss": 1.0167, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.20790828718546586, |
| "grad_norm": 0.22340558469295502, |
| "learning_rate": 0.000158816660179058, |
| "loss": 1.0991, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.20829690080637325, |
| "grad_norm": 0.223599374294281, |
| "learning_rate": 0.00015873880887504866, |
| "loss": 1.086, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.20868551442728067, |
| "grad_norm": 0.2615208923816681, |
| "learning_rate": 0.0001586609575710393, |
| "loss": 1.0584, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2090741280481881, |
| "grad_norm": 0.2085907757282257, |
| "learning_rate": 0.00015858310626703, |
| "loss": 1.0994, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.2094627416690955, |
| "grad_norm": 0.2170211672782898, |
| "learning_rate": 0.00015850525496302064, |
| "loss": 1.1105, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.20985135529000293, |
| "grad_norm": 0.21978625655174255, |
| "learning_rate": 0.0001584274036590113, |
| "loss": 1.002, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21023996891091032, |
| "grad_norm": 0.23684021830558777, |
| "learning_rate": 0.00015834955235500194, |
| "loss": 1.1216, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.21062858253181774, |
| "grad_norm": 0.220269113779068, |
| "learning_rate": 0.0001582717010509926, |
| "loss": 1.0773, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.21101719615272516, |
| "grad_norm": 0.22447973489761353, |
| "learning_rate": 0.00015819384974698328, |
| "loss": 1.0941, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.21140580977363257, |
| "grad_norm": 0.22435730695724487, |
| "learning_rate": 0.00015811599844297393, |
| "loss": 1.0138, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.21179442339453997, |
| "grad_norm": 0.2230793684720993, |
| "learning_rate": 0.00015803814713896458, |
| "loss": 1.0343, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.21218303701544738, |
| "grad_norm": 0.23491905629634857, |
| "learning_rate": 0.00015796029583495523, |
| "loss": 1.11, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.2125716506363548, |
| "grad_norm": 0.213560551404953, |
| "learning_rate": 0.00015788244453094588, |
| "loss": 1.0615, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.21296026425726222, |
| "grad_norm": 0.21392837166786194, |
| "learning_rate": 0.00015780459322693654, |
| "loss": 1.0872, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.21334887787816964, |
| "grad_norm": 0.20007692277431488, |
| "learning_rate": 0.00015772674192292722, |
| "loss": 1.0394, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.21373749149907703, |
| "grad_norm": 0.1969841718673706, |
| "learning_rate": 0.00015764889061891787, |
| "loss": 1.0381, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21412610511998445, |
| "grad_norm": 0.21874025464057922, |
| "learning_rate": 0.00015757103931490852, |
| "loss": 1.0822, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.21451471874089187, |
| "grad_norm": 0.21824273467063904, |
| "learning_rate": 0.00015749318801089917, |
| "loss": 1.0802, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2149033323617993, |
| "grad_norm": 0.20942047238349915, |
| "learning_rate": 0.00015741533670688985, |
| "loss": 1.0634, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2152919459827067, |
| "grad_norm": 0.1940152943134308, |
| "learning_rate": 0.0001573374854028805, |
| "loss": 1.0264, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2156805596036141, |
| "grad_norm": 0.19859059154987335, |
| "learning_rate": 0.00015725963409887115, |
| "loss": 0.9701, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.21606917322452152, |
| "grad_norm": 0.22239404916763306, |
| "learning_rate": 0.0001571817827948618, |
| "loss": 1.1282, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.21645778684542893, |
| "grad_norm": 0.23820599913597107, |
| "learning_rate": 0.00015710393149085249, |
| "loss": 1.1123, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.21684640046633635, |
| "grad_norm": 0.21279917657375336, |
| "learning_rate": 0.00015702608018684314, |
| "loss": 1.0542, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.21723501408724374, |
| "grad_norm": 0.2065514773130417, |
| "learning_rate": 0.0001569482288828338, |
| "loss": 1.0685, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.21762362770815116, |
| "grad_norm": 0.20130831003189087, |
| "learning_rate": 0.00015687037757882447, |
| "loss": 0.9869, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21801224132905858, |
| "grad_norm": 0.2187541127204895, |
| "learning_rate": 0.00015679252627481512, |
| "loss": 1.1095, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.218400854949966, |
| "grad_norm": 0.21028277277946472, |
| "learning_rate": 0.00015671467497080577, |
| "loss": 1.0804, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.21878946857087342, |
| "grad_norm": 0.8187636733055115, |
| "learning_rate": 0.00015663682366679643, |
| "loss": 1.0782, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2191780821917808, |
| "grad_norm": 0.20059974491596222, |
| "learning_rate": 0.0001565589723627871, |
| "loss": 1.0279, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.21956669581268823, |
| "grad_norm": 0.20440839231014252, |
| "learning_rate": 0.00015648112105877776, |
| "loss": 0.9863, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.21995530943359565, |
| "grad_norm": 0.21423624455928802, |
| "learning_rate": 0.0001564032697547684, |
| "loss": 1.0685, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.22034392305450307, |
| "grad_norm": 0.22430062294006348, |
| "learning_rate": 0.00015632541845075906, |
| "loss": 1.0761, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.22073253667541048, |
| "grad_norm": 0.22782258689403534, |
| "learning_rate": 0.0001562475671467497, |
| "loss": 1.1024, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.22112115029631788, |
| "grad_norm": 0.21150320768356323, |
| "learning_rate": 0.0001561697158427404, |
| "loss": 1.0621, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2215097639172253, |
| "grad_norm": 0.20342351496219635, |
| "learning_rate": 0.00015609186453873104, |
| "loss": 1.0667, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2218983775381327, |
| "grad_norm": 0.22866711020469666, |
| "learning_rate": 0.0001560140132347217, |
| "loss": 1.0631, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.22228699115904013, |
| "grad_norm": 0.2200063169002533, |
| "learning_rate": 0.00015593616193071235, |
| "loss": 1.0448, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.22267560477994755, |
| "grad_norm": 0.19440248608589172, |
| "learning_rate": 0.000155858310626703, |
| "loss": 1.037, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.22306421840085494, |
| "grad_norm": 0.205752432346344, |
| "learning_rate": 0.00015578045932269368, |
| "loss": 1.0465, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.22345283202176236, |
| "grad_norm": 0.22247998416423798, |
| "learning_rate": 0.00015570260801868433, |
| "loss": 0.997, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.22384144564266978, |
| "grad_norm": 0.22199274599552155, |
| "learning_rate": 0.00015562475671467498, |
| "loss": 1.0178, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2242300592635772, |
| "grad_norm": 0.2114989310503006, |
| "learning_rate": 0.00015554690541066564, |
| "loss": 1.0457, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2246186728844846, |
| "grad_norm": 0.24248506128787994, |
| "learning_rate": 0.0001554690541066563, |
| "loss": 1.002, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.225007286505392, |
| "grad_norm": 0.2565505802631378, |
| "learning_rate": 0.00015539120280264697, |
| "loss": 1.0541, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.22539590012629943, |
| "grad_norm": 0.22799409925937653, |
| "learning_rate": 0.00015531335149863762, |
| "loss": 1.0788, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22578451374720684, |
| "grad_norm": 0.2196080982685089, |
| "learning_rate": 0.00015523550019462827, |
| "loss": 1.0877, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.22617312736811426, |
| "grad_norm": 0.21992824971675873, |
| "learning_rate": 0.00015515764889061892, |
| "loss": 1.0213, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.22656174098902165, |
| "grad_norm": 0.22793298959732056, |
| "learning_rate": 0.00015507979758660957, |
| "loss": 1.0633, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.22695035460992907, |
| "grad_norm": 0.21707972884178162, |
| "learning_rate": 0.00015500194628260023, |
| "loss": 1.081, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2273389682308365, |
| "grad_norm": 0.220685675740242, |
| "learning_rate": 0.0001549240949785909, |
| "loss": 1.0658, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.2277275818517439, |
| "grad_norm": 0.22576668858528137, |
| "learning_rate": 0.00015484624367458156, |
| "loss": 1.0795, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.22811619547265133, |
| "grad_norm": 0.21778982877731323, |
| "learning_rate": 0.0001547683923705722, |
| "loss": 1.033, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.22850480909355872, |
| "grad_norm": 0.22748610377311707, |
| "learning_rate": 0.00015469054106656286, |
| "loss": 1.0948, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.22889342271446614, |
| "grad_norm": 0.21561284363269806, |
| "learning_rate": 0.00015461268976255351, |
| "loss": 1.0022, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.22928203633537356, |
| "grad_norm": 0.2419756054878235, |
| "learning_rate": 0.0001545348384585442, |
| "loss": 1.0786, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.22967064995628098, |
| "grad_norm": 0.20479315519332886, |
| "learning_rate": 0.00015445698715453485, |
| "loss": 1.027, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2300592635771884, |
| "grad_norm": 0.21365883946418762, |
| "learning_rate": 0.0001543791358505255, |
| "loss": 1.0773, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.23044787719809579, |
| "grad_norm": 0.23133166134357452, |
| "learning_rate": 0.00015430128454651615, |
| "loss": 1.0877, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.2308364908190032, |
| "grad_norm": 0.2110515981912613, |
| "learning_rate": 0.0001542234332425068, |
| "loss": 1.0509, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.23122510443991062, |
| "grad_norm": 0.20658442378044128, |
| "learning_rate": 0.00015414558193849748, |
| "loss": 1.0623, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.23161371806081804, |
| "grad_norm": 0.21831996738910675, |
| "learning_rate": 0.00015406773063448813, |
| "loss": 1.021, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.23200233168172543, |
| "grad_norm": 0.23015642166137695, |
| "learning_rate": 0.00015398987933047878, |
| "loss": 1.0358, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.23239094530263285, |
| "grad_norm": 0.23071645200252533, |
| "learning_rate": 0.00015391202802646944, |
| "loss": 1.1255, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.23277955892354027, |
| "grad_norm": 0.19513486325740814, |
| "learning_rate": 0.0001538341767224601, |
| "loss": 1.0189, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2331681725444477, |
| "grad_norm": 0.20821452140808105, |
| "learning_rate": 0.00015375632541845077, |
| "loss": 1.0843, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2335567861653551, |
| "grad_norm": 0.20563223958015442, |
| "learning_rate": 0.00015367847411444142, |
| "loss": 1.0012, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.2339453997862625, |
| "grad_norm": 0.22674202919006348, |
| "learning_rate": 0.00015360062281043207, |
| "loss": 1.0371, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.23433401340716992, |
| "grad_norm": 0.20744135975837708, |
| "learning_rate": 0.00015352277150642272, |
| "loss": 1.0466, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.23472262702807734, |
| "grad_norm": 0.22103577852249146, |
| "learning_rate": 0.00015344492020241338, |
| "loss": 1.0942, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.23511124064898475, |
| "grad_norm": 0.20643098652362823, |
| "learning_rate": 0.00015336706889840406, |
| "loss": 1.0682, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.23549985426989217, |
| "grad_norm": 0.23436777293682098, |
| "learning_rate": 0.0001532892175943947, |
| "loss": 1.0613, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.23588846789079956, |
| "grad_norm": 0.21898899972438812, |
| "learning_rate": 0.00015321136629038536, |
| "loss": 1.0571, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.23627708151170698, |
| "grad_norm": 0.20569247007369995, |
| "learning_rate": 0.00015313351498637604, |
| "loss": 1.061, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2366656951326144, |
| "grad_norm": 0.2099207490682602, |
| "learning_rate": 0.0001530556636823667, |
| "loss": 1.0776, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.23705430875352182, |
| "grad_norm": 0.20078738033771515, |
| "learning_rate": 0.00015297781237835734, |
| "loss": 1.0341, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2374429223744292, |
| "grad_norm": 0.20327065885066986, |
| "learning_rate": 0.000152899961074348, |
| "loss": 1.0168, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.23783153599533663, |
| "grad_norm": 0.21741214394569397, |
| "learning_rate": 0.00015282210977033867, |
| "loss": 1.0726, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.23822014961624405, |
| "grad_norm": 0.2065727263689041, |
| "learning_rate": 0.00015274425846632933, |
| "loss": 1.0474, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.23860876323715147, |
| "grad_norm": 0.21241194009780884, |
| "learning_rate": 0.00015266640716231998, |
| "loss": 1.0666, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.23899737685805889, |
| "grad_norm": 0.2194201797246933, |
| "learning_rate": 0.00015258855585831066, |
| "loss": 1.1411, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.23938599047896628, |
| "grad_norm": 0.21537193655967712, |
| "learning_rate": 0.0001525107045543013, |
| "loss": 1.081, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2397746040998737, |
| "grad_norm": 0.21125951409339905, |
| "learning_rate": 0.00015243285325029196, |
| "loss": 1.0679, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.2401632177207811, |
| "grad_norm": 0.21342721581459045, |
| "learning_rate": 0.0001523550019462826, |
| "loss": 1.0564, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.24055183134168853, |
| "grad_norm": 0.2223503291606903, |
| "learning_rate": 0.00015227715064227327, |
| "loss": 1.1163, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.24094044496259595, |
| "grad_norm": 0.21626527607440948, |
| "learning_rate": 0.00015219929933826394, |
| "loss": 1.0793, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.24132905858350334, |
| "grad_norm": 0.21899500489234924, |
| "learning_rate": 0.0001521214480342546, |
| "loss": 1.0864, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.24171767220441076, |
| "grad_norm": 0.2499915212392807, |
| "learning_rate": 0.00015204359673024525, |
| "loss": 1.1381, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.24210628582531818, |
| "grad_norm": 0.2108345925807953, |
| "learning_rate": 0.0001519657454262359, |
| "loss": 1.0534, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.2424948994462256, |
| "grad_norm": 0.2224910855293274, |
| "learning_rate": 0.00015188789412222655, |
| "loss": 1.0235, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.24288351306713302, |
| "grad_norm": 0.22163094580173492, |
| "learning_rate": 0.0001518100428182172, |
| "loss": 1.0143, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2432721266880404, |
| "grad_norm": 0.20709283649921417, |
| "learning_rate": 0.00015173219151420788, |
| "loss": 1.0506, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.24366074030894783, |
| "grad_norm": 0.2112802267074585, |
| "learning_rate": 0.00015165434021019854, |
| "loss": 1.0692, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.24404935392985525, |
| "grad_norm": 0.23622830212116241, |
| "learning_rate": 0.0001515764889061892, |
| "loss": 1.0769, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.24443796755076266, |
| "grad_norm": 0.23328271508216858, |
| "learning_rate": 0.00015149863760217984, |
| "loss": 1.1158, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.24482658117167005, |
| "grad_norm": 0.2071760892868042, |
| "learning_rate": 0.0001514207862981705, |
| "loss": 1.0133, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24521519479257747, |
| "grad_norm": 0.21428920328617096, |
| "learning_rate": 0.00015134293499416117, |
| "loss": 1.0342, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2456038084134849, |
| "grad_norm": 0.22225375473499298, |
| "learning_rate": 0.00015126508369015182, |
| "loss": 1.1054, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.2459924220343923, |
| "grad_norm": 0.2096671611070633, |
| "learning_rate": 0.00015118723238614248, |
| "loss": 1.0229, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.24638103565529973, |
| "grad_norm": 0.21473252773284912, |
| "learning_rate": 0.00015110938108213313, |
| "loss": 1.0915, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.24676964927620712, |
| "grad_norm": 0.2071562111377716, |
| "learning_rate": 0.00015103152977812378, |
| "loss": 1.047, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.24715826289711454, |
| "grad_norm": 0.19868609309196472, |
| "learning_rate": 0.00015095367847411446, |
| "loss": 1.0073, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.24754687651802196, |
| "grad_norm": 0.20937366783618927, |
| "learning_rate": 0.0001508758271701051, |
| "loss": 1.0155, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.24793549013892938, |
| "grad_norm": 0.19225911796092987, |
| "learning_rate": 0.00015079797586609576, |
| "loss": 1.0163, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.2483241037598368, |
| "grad_norm": 0.20427283644676208, |
| "learning_rate": 0.00015072012456208641, |
| "loss": 1.062, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.24871271738074419, |
| "grad_norm": 0.21640253067016602, |
| "learning_rate": 0.00015064227325807707, |
| "loss": 1.025, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2491013310016516, |
| "grad_norm": 0.20416739583015442, |
| "learning_rate": 0.00015056442195406775, |
| "loss": 1.0635, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.24948994462255902, |
| "grad_norm": 0.1990521252155304, |
| "learning_rate": 0.0001504865706500584, |
| "loss": 1.0757, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.24987855824346644, |
| "grad_norm": 0.21636444330215454, |
| "learning_rate": 0.00015040871934604905, |
| "loss": 1.0441, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.25026717186437386, |
| "grad_norm": 0.21253719925880432, |
| "learning_rate": 0.0001503308680420397, |
| "loss": 1.0574, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2506557854852813, |
| "grad_norm": 0.2134159356355667, |
| "learning_rate": 0.00015025301673803035, |
| "loss": 1.0396, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2510443991061887, |
| "grad_norm": 0.2018527239561081, |
| "learning_rate": 0.00015017516543402103, |
| "loss": 1.0606, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.25143301272709606, |
| "grad_norm": 0.20320741832256317, |
| "learning_rate": 0.00015009731413001169, |
| "loss": 1.0093, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2518216263480035, |
| "grad_norm": 0.21007056534290314, |
| "learning_rate": 0.00015001946282600234, |
| "loss": 1.0284, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.2522102399689109, |
| "grad_norm": 0.22453372180461884, |
| "learning_rate": 0.000149941611521993, |
| "loss": 1.0271, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.2525988535898183, |
| "grad_norm": 0.19889335334300995, |
| "learning_rate": 0.00014986376021798364, |
| "loss": 1.0238, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.25298746721072574, |
| "grad_norm": 0.19339965283870697, |
| "learning_rate": 0.00014978590891397432, |
| "loss": 1.024, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.25337608083163315, |
| "grad_norm": 0.22362011671066284, |
| "learning_rate": 0.00014970805760996497, |
| "loss": 1.0722, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2537646944525406, |
| "grad_norm": 0.2110588103532791, |
| "learning_rate": 0.00014963020630595562, |
| "loss": 1.0541, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.254153308073448, |
| "grad_norm": 0.203025683760643, |
| "learning_rate": 0.00014955235500194628, |
| "loss": 1.0335, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.2545419216943554, |
| "grad_norm": 0.20884902775287628, |
| "learning_rate": 0.00014947450369793693, |
| "loss": 1.0507, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2549305353152628, |
| "grad_norm": 0.21234256029129028, |
| "learning_rate": 0.0001493966523939276, |
| "loss": 1.0372, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 0.1984352171421051, |
| "learning_rate": 0.00014931880108991826, |
| "loss": 0.9979, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.2557077625570776, |
| "grad_norm": 0.18848282098770142, |
| "learning_rate": 0.0001492409497859089, |
| "loss": 0.9973, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.25609637617798503, |
| "grad_norm": 0.2201709896326065, |
| "learning_rate": 0.00014916309848189956, |
| "loss": 1.0386, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.25648498979889245, |
| "grad_norm": 0.23094095289707184, |
| "learning_rate": 0.00014908524717789024, |
| "loss": 1.1205, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.25687360341979987, |
| "grad_norm": 0.21087734401226044, |
| "learning_rate": 0.0001490073958738809, |
| "loss": 1.0231, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.2572622170407073, |
| "grad_norm": 0.24970979988574982, |
| "learning_rate": 0.00014892954456987155, |
| "loss": 1.0421, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.2576508306616147, |
| "grad_norm": 0.22024711966514587, |
| "learning_rate": 0.00014885169326586223, |
| "loss": 1.1033, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.2580394442825221, |
| "grad_norm": 0.2195248156785965, |
| "learning_rate": 0.00014877384196185288, |
| "loss": 1.089, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.25842805790342954, |
| "grad_norm": 0.20236417651176453, |
| "learning_rate": 0.00014869599065784353, |
| "loss": 1.0196, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.2588166715243369, |
| "grad_norm": 0.21973329782485962, |
| "learning_rate": 0.00014861813935383418, |
| "loss": 1.0844, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.2592052851452443, |
| "grad_norm": 0.2069879174232483, |
| "learning_rate": 0.00014854028804982486, |
| "loss": 1.0312, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.25959389876615174, |
| "grad_norm": 0.2037455290555954, |
| "learning_rate": 0.00014846243674581551, |
| "loss": 1.0018, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.25998251238705916, |
| "grad_norm": 0.24176378548145294, |
| "learning_rate": 0.00014838458544180617, |
| "loss": 1.0749, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.2603711260079666, |
| "grad_norm": 0.2007879763841629, |
| "learning_rate": 0.00014830673413779682, |
| "loss": 1.0443, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.260759739628874, |
| "grad_norm": 0.23503245413303375, |
| "learning_rate": 0.00014822888283378747, |
| "loss": 1.0674, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.2611483532497814, |
| "grad_norm": 0.2166167050600052, |
| "learning_rate": 0.00014815103152977815, |
| "loss": 1.079, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.26153696687068884, |
| "grad_norm": 0.2293982058763504, |
| "learning_rate": 0.0001480731802257688, |
| "loss": 1.0517, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.26192558049159625, |
| "grad_norm": 0.21040330827236176, |
| "learning_rate": 0.00014799532892175945, |
| "loss": 1.0475, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.2623141941125036, |
| "grad_norm": 0.20750463008880615, |
| "learning_rate": 0.0001479174776177501, |
| "loss": 1.025, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.26270280773341104, |
| "grad_norm": 0.2748873233795166, |
| "learning_rate": 0.00014783962631374076, |
| "loss": 1.0212, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.26309142135431846, |
| "grad_norm": 0.19212333858013153, |
| "learning_rate": 0.00014776177500973144, |
| "loss": 1.0049, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.2634800349752259, |
| "grad_norm": 0.207731693983078, |
| "learning_rate": 0.0001476839237057221, |
| "loss": 1.0062, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2638686485961333, |
| "grad_norm": 0.2177981585264206, |
| "learning_rate": 0.00014760607240171274, |
| "loss": 1.0489, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.2642572622170407, |
| "grad_norm": 0.23239290714263916, |
| "learning_rate": 0.0001475282210977034, |
| "loss": 1.0856, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.26464587583794813, |
| "grad_norm": 0.2033151388168335, |
| "learning_rate": 0.00014745036979369404, |
| "loss": 1.0389, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.26503448945885555, |
| "grad_norm": 0.20917408168315887, |
| "learning_rate": 0.00014737251848968472, |
| "loss": 1.1208, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.26542310307976297, |
| "grad_norm": 0.22075454890727997, |
| "learning_rate": 0.00014729466718567538, |
| "loss": 1.0435, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.26581171670067033, |
| "grad_norm": 0.23094993829727173, |
| "learning_rate": 0.00014721681588166603, |
| "loss": 1.0649, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.26620033032157775, |
| "grad_norm": 0.21209536492824554, |
| "learning_rate": 0.00014713896457765668, |
| "loss": 1.0578, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.26658894394248517, |
| "grad_norm": 0.21412219107151031, |
| "learning_rate": 0.00014706111327364733, |
| "loss": 1.1137, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.2669775575633926, |
| "grad_norm": 0.21175475418567657, |
| "learning_rate": 0.000146983261969638, |
| "loss": 1.023, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.2673661711843, |
| "grad_norm": 0.21968993544578552, |
| "learning_rate": 0.00014690541066562866, |
| "loss": 1.1183, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.2677547848052074, |
| "grad_norm": 0.20414218306541443, |
| "learning_rate": 0.00014682755936161932, |
| "loss": 1.078, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.26814339842611484, |
| "grad_norm": 0.18986597657203674, |
| "learning_rate": 0.00014674970805760997, |
| "loss": 1.0029, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.26853201204702226, |
| "grad_norm": 0.21215832233428955, |
| "learning_rate": 0.00014667185675360062, |
| "loss": 1.0759, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2689206256679297, |
| "grad_norm": 0.2113744169473648, |
| "learning_rate": 0.0001465940054495913, |
| "loss": 1.1027, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.2693092392888371, |
| "grad_norm": 0.22010880708694458, |
| "learning_rate": 0.00014651615414558195, |
| "loss": 1.0984, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.26969785290974446, |
| "grad_norm": 0.203857421875, |
| "learning_rate": 0.0001464383028415726, |
| "loss": 1.0407, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2700864665306519, |
| "grad_norm": 0.21120867133140564, |
| "learning_rate": 0.00014636045153756325, |
| "loss": 1.0521, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.2704750801515593, |
| "grad_norm": 0.20039112865924835, |
| "learning_rate": 0.0001462826002335539, |
| "loss": 1.0897, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.2708636937724667, |
| "grad_norm": 0.22893202304840088, |
| "learning_rate": 0.00014620474892954456, |
| "loss": 1.0903, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.27125230739337414, |
| "grad_norm": 0.19886267185211182, |
| "learning_rate": 0.00014612689762553524, |
| "loss": 1.0889, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.27164092101428156, |
| "grad_norm": 0.18892349302768707, |
| "learning_rate": 0.0001460490463215259, |
| "loss": 0.981, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.272029534635189, |
| "grad_norm": 0.20602507889270782, |
| "learning_rate": 0.00014597119501751654, |
| "loss": 1.0223, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2724181482560964, |
| "grad_norm": 0.21480505168437958, |
| "learning_rate": 0.0001458933437135072, |
| "loss": 1.0355, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.2728067618770038, |
| "grad_norm": 0.21011753380298615, |
| "learning_rate": 0.00014581549240949785, |
| "loss": 1.0613, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.2731953754979112, |
| "grad_norm": 0.19350819289684296, |
| "learning_rate": 0.00014573764110548853, |
| "loss": 1.0144, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.2735839891188186, |
| "grad_norm": 0.207548126578331, |
| "learning_rate": 0.00014565978980147918, |
| "loss": 1.0465, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.273972602739726, |
| "grad_norm": 0.22220565378665924, |
| "learning_rate": 0.00014558193849746983, |
| "loss": 1.1073, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.27436121636063343, |
| "grad_norm": 0.193622425198555, |
| "learning_rate": 0.00014550408719346048, |
| "loss": 1.0357, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.27474982998154085, |
| "grad_norm": 0.2067158818244934, |
| "learning_rate": 0.00014542623588945113, |
| "loss": 1.0502, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.27513844360244827, |
| "grad_norm": 0.2218742072582245, |
| "learning_rate": 0.0001453483845854418, |
| "loss": 0.9934, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.2755270572233557, |
| "grad_norm": 0.22316142916679382, |
| "learning_rate": 0.00014527053328143246, |
| "loss": 1.0707, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.2759156708442631, |
| "grad_norm": 0.21004025638103485, |
| "learning_rate": 0.00014519268197742312, |
| "loss": 1.0543, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2763042844651705, |
| "grad_norm": 0.22070440649986267, |
| "learning_rate": 0.00014511483067341377, |
| "loss": 1.0467, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.27669289808607794, |
| "grad_norm": 0.21463747322559357, |
| "learning_rate": 0.00014503697936940445, |
| "loss": 1.0793, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.2770815117069853, |
| "grad_norm": 0.23452533781528473, |
| "learning_rate": 0.0001449591280653951, |
| "loss": 1.043, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2774701253278927, |
| "grad_norm": 0.2405795156955719, |
| "learning_rate": 0.00014488127676138575, |
| "loss": 1.0752, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.27785873894880014, |
| "grad_norm": 0.21546585857868195, |
| "learning_rate": 0.00014480342545737643, |
| "loss": 1.0834, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.27824735256970756, |
| "grad_norm": 0.22675828635692596, |
| "learning_rate": 0.00014472557415336708, |
| "loss": 1.055, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.278635966190615, |
| "grad_norm": 0.2117871195077896, |
| "learning_rate": 0.00014464772284935774, |
| "loss": 1.03, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.2790245798115224, |
| "grad_norm": 0.2193155735731125, |
| "learning_rate": 0.00014456987154534841, |
| "loss": 1.0073, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2794131934324298, |
| "grad_norm": 0.21447965502738953, |
| "learning_rate": 0.00014449202024133907, |
| "loss": 1.0174, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.27980180705333724, |
| "grad_norm": 0.22867532074451447, |
| "learning_rate": 0.00014441416893732972, |
| "loss": 1.0948, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.28019042067424466, |
| "grad_norm": 0.21570557355880737, |
| "learning_rate": 0.00014433631763332037, |
| "loss": 1.0105, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.280579034295152, |
| "grad_norm": 0.20787014067173004, |
| "learning_rate": 0.00014425846632931102, |
| "loss": 1.0384, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.28096764791605944, |
| "grad_norm": 0.19924762845039368, |
| "learning_rate": 0.0001441806150253017, |
| "loss": 1.0653, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.28135626153696686, |
| "grad_norm": 0.1996215283870697, |
| "learning_rate": 0.00014410276372129235, |
| "loss": 1.0439, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.2817448751578743, |
| "grad_norm": 0.2054813802242279, |
| "learning_rate": 0.000144024912417283, |
| "loss": 0.9895, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2821334887787817, |
| "grad_norm": 0.2268310785293579, |
| "learning_rate": 0.00014394706111327366, |
| "loss": 1.0993, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.2825221023996891, |
| "grad_norm": 0.19867680966854095, |
| "learning_rate": 0.0001438692098092643, |
| "loss": 0.985, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.28291071602059653, |
| "grad_norm": 0.21099598705768585, |
| "learning_rate": 0.000143791358505255, |
| "loss": 1.0333, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.28329932964150395, |
| "grad_norm": 0.22479215264320374, |
| "learning_rate": 0.00014371350720124564, |
| "loss": 1.0449, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 0.22717688977718353, |
| "learning_rate": 0.0001436356558972363, |
| "loss": 1.0482, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2840765568833188, |
| "grad_norm": 0.20389345288276672, |
| "learning_rate": 0.00014355780459322695, |
| "loss": 0.956, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.28446517050422615, |
| "grad_norm": 0.21583619713783264, |
| "learning_rate": 0.0001434799532892176, |
| "loss": 1.0154, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.28485378412513357, |
| "grad_norm": 0.2219148874282837, |
| "learning_rate": 0.00014340210198520825, |
| "loss": 1.0553, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.285242397746041, |
| "grad_norm": 0.19920189678668976, |
| "learning_rate": 0.00014332425068119893, |
| "loss": 0.9881, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.2856310113669484, |
| "grad_norm": 0.2295670360326767, |
| "learning_rate": 0.00014324639937718958, |
| "loss": 1.0529, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.2860196249878558, |
| "grad_norm": 0.21271567046642303, |
| "learning_rate": 0.00014316854807318023, |
| "loss": 1.037, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.28640823860876324, |
| "grad_norm": 0.21304361522197723, |
| "learning_rate": 0.00014309069676917088, |
| "loss": 1.048, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.28679685222967066, |
| "grad_norm": 0.19902732968330383, |
| "learning_rate": 0.00014301284546516154, |
| "loss": 1.0306, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2871854658505781, |
| "grad_norm": 0.1995929330587387, |
| "learning_rate": 0.00014293499416115222, |
| "loss": 1.0394, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2875740794714855, |
| "grad_norm": 0.20426060259342194, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 1.0052, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.28796269309239286, |
| "grad_norm": 0.20284566283226013, |
| "learning_rate": 0.00014277929155313352, |
| "loss": 1.0115, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.2883513067133003, |
| "grad_norm": 0.2041557878255844, |
| "learning_rate": 0.00014270144024912417, |
| "loss": 1.0473, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.2887399203342077, |
| "grad_norm": 0.2152249962091446, |
| "learning_rate": 0.00014262358894511482, |
| "loss": 1.0802, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.2891285339551151, |
| "grad_norm": 0.20569871366024017, |
| "learning_rate": 0.0001425457376411055, |
| "loss": 1.0203, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.28951714757602254, |
| "grad_norm": 0.21128378808498383, |
| "learning_rate": 0.00014246788633709616, |
| "loss": 1.108, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.28990576119692996, |
| "grad_norm": 0.19587135314941406, |
| "learning_rate": 0.0001423900350330868, |
| "loss": 1.0427, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.2902943748178374, |
| "grad_norm": 0.22052550315856934, |
| "learning_rate": 0.00014231218372907746, |
| "loss": 1.055, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.2906829884387448, |
| "grad_norm": 0.21291717886924744, |
| "learning_rate": 0.0001422343324250681, |
| "loss": 1.0591, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2910716020596522, |
| "grad_norm": 0.20634084939956665, |
| "learning_rate": 0.0001421564811210588, |
| "loss": 1.0527, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.29146021568055963, |
| "grad_norm": 0.2075488269329071, |
| "learning_rate": 0.00014207862981704944, |
| "loss": 1.0786, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.291848829301467, |
| "grad_norm": 0.19780080020427704, |
| "learning_rate": 0.0001420007785130401, |
| "loss": 1.059, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.2922374429223744, |
| "grad_norm": 0.21212074160575867, |
| "learning_rate": 0.00014192292720903075, |
| "loss": 1.0346, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.29262605654328183, |
| "grad_norm": 0.2218451350927353, |
| "learning_rate": 0.0001418450759050214, |
| "loss": 1.0908, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.29301467016418925, |
| "grad_norm": 0.20107759535312653, |
| "learning_rate": 0.00014176722460101208, |
| "loss": 1.0202, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.29340328378509667, |
| "grad_norm": 0.20933273434638977, |
| "learning_rate": 0.00014168937329700273, |
| "loss": 1.0719, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2937918974060041, |
| "grad_norm": 0.22369107604026794, |
| "learning_rate": 0.00014161152199299338, |
| "loss": 1.0433, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.2941805110269115, |
| "grad_norm": 0.2113707810640335, |
| "learning_rate": 0.00014153367068898403, |
| "loss": 1.0637, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2945691246478189, |
| "grad_norm": 0.21105700731277466, |
| "learning_rate": 0.00014145581938497469, |
| "loss": 1.0468, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.29495773826872634, |
| "grad_norm": 0.20189693570137024, |
| "learning_rate": 0.00014137796808096537, |
| "loss": 1.0281, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2953463518896337, |
| "grad_norm": 0.1954152137041092, |
| "learning_rate": 0.00014130011677695602, |
| "loss": 1.0519, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2957349655105411, |
| "grad_norm": 0.24295592308044434, |
| "learning_rate": 0.00014122226547294667, |
| "loss": 1.1303, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.29612357913144854, |
| "grad_norm": 0.20158620178699493, |
| "learning_rate": 0.00014114441416893732, |
| "loss": 1.0367, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.29651219275235596, |
| "grad_norm": 0.20734666287899017, |
| "learning_rate": 0.00014106656286492797, |
| "loss": 1.0392, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.2969008063732634, |
| "grad_norm": 0.2177533656358719, |
| "learning_rate": 0.00014098871156091865, |
| "loss": 1.0619, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2972894199941708, |
| "grad_norm": 0.1961720883846283, |
| "learning_rate": 0.0001409108602569093, |
| "loss": 0.9872, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.2976780336150782, |
| "grad_norm": 0.21530941128730774, |
| "learning_rate": 0.00014083300895289996, |
| "loss": 1.1246, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.29806664723598564, |
| "grad_norm": 0.2039783000946045, |
| "learning_rate": 0.00014075515764889064, |
| "loss": 1.0789, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.29845526085689306, |
| "grad_norm": 0.20641569793224335, |
| "learning_rate": 0.0001406773063448813, |
| "loss": 1.05, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2988438744778004, |
| "grad_norm": 0.2071225494146347, |
| "learning_rate": 0.00014059945504087194, |
| "loss": 1.047, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.29923248809870784, |
| "grad_norm": 0.20367531478405, |
| "learning_rate": 0.00014052160373686262, |
| "loss": 1.0734, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.29962110171961526, |
| "grad_norm": 0.21718619763851166, |
| "learning_rate": 0.00014044375243285327, |
| "loss": 1.0613, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3000097153405227, |
| "grad_norm": 0.21649087965488434, |
| "learning_rate": 0.00014036590112884392, |
| "loss": 1.0671, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.3003983289614301, |
| "grad_norm": 0.22223225235939026, |
| "learning_rate": 0.00014028804982483458, |
| "loss": 1.0977, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3007869425823375, |
| "grad_norm": 0.23101870715618134, |
| "learning_rate": 0.00014021019852082523, |
| "loss": 1.1236, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.30117555620324493, |
| "grad_norm": 0.22855506837368011, |
| "learning_rate": 0.0001401323472168159, |
| "loss": 1.0517, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.30156416982415235, |
| "grad_norm": 0.20862117409706116, |
| "learning_rate": 0.00014005449591280656, |
| "loss": 1.0493, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.30195278344505977, |
| "grad_norm": 0.21692048013210297, |
| "learning_rate": 0.0001399766446087972, |
| "loss": 1.0681, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.3023413970659672, |
| "grad_norm": 0.21541331708431244, |
| "learning_rate": 0.00013989879330478786, |
| "loss": 1.0775, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.30273001068687455, |
| "grad_norm": 0.21221749484539032, |
| "learning_rate": 0.00013982094200077851, |
| "loss": 1.0421, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.30311862430778197, |
| "grad_norm": 0.22497743368148804, |
| "learning_rate": 0.0001397430906967692, |
| "loss": 1.1115, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3035072379286894, |
| "grad_norm": 0.1974119246006012, |
| "learning_rate": 0.00013966523939275985, |
| "loss": 1.0264, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.3038958515495968, |
| "grad_norm": 0.20349323749542236, |
| "learning_rate": 0.0001395873880887505, |
| "loss": 1.0512, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3042844651705042, |
| "grad_norm": 0.21116937696933746, |
| "learning_rate": 0.00013950953678474115, |
| "loss": 1.0135, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.30467307879141164, |
| "grad_norm": 0.2133677899837494, |
| "learning_rate": 0.0001394316854807318, |
| "loss": 1.0694, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.30506169241231906, |
| "grad_norm": 0.20406191051006317, |
| "learning_rate": 0.00013935383417672248, |
| "loss": 1.0179, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3054503060332265, |
| "grad_norm": 0.21428678929805756, |
| "learning_rate": 0.00013927598287271313, |
| "loss": 1.0577, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3058389196541339, |
| "grad_norm": 0.20878921449184418, |
| "learning_rate": 0.00013919813156870379, |
| "loss": 1.0311, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.30622753327504126, |
| "grad_norm": 0.19033175706863403, |
| "learning_rate": 0.00013912028026469444, |
| "loss": 0.976, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3066161468959487, |
| "grad_norm": 0.22138020396232605, |
| "learning_rate": 0.0001390424289606851, |
| "loss": 1.0438, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3070047605168561, |
| "grad_norm": 0.20765596628189087, |
| "learning_rate": 0.00013896457765667577, |
| "loss": 1.0865, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3073933741377635, |
| "grad_norm": 0.209733247756958, |
| "learning_rate": 0.00013888672635266642, |
| "loss": 1.0648, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.30778198775867094, |
| "grad_norm": 0.1896686851978302, |
| "learning_rate": 0.00013880887504865707, |
| "loss": 1.0133, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.30817060137957836, |
| "grad_norm": 0.21651998162269592, |
| "learning_rate": 0.00013873102374464772, |
| "loss": 1.0729, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3085592150004858, |
| "grad_norm": 0.21751996874809265, |
| "learning_rate": 0.00013865317244063838, |
| "loss": 1.0444, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3089478286213932, |
| "grad_norm": 0.20593520998954773, |
| "learning_rate": 0.00013857532113662906, |
| "loss": 1.0304, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3093364422423006, |
| "grad_norm": 0.19937261939048767, |
| "learning_rate": 0.0001384974698326197, |
| "loss": 1.0017, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.30972505586320803, |
| "grad_norm": 0.18901696801185608, |
| "learning_rate": 0.00013841961852861036, |
| "loss": 1.0362, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3101136694841154, |
| "grad_norm": 0.2079760730266571, |
| "learning_rate": 0.000138341767224601, |
| "loss": 1.0784, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3105022831050228, |
| "grad_norm": 0.24873265624046326, |
| "learning_rate": 0.00013826391592059166, |
| "loss": 1.1026, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.31089089672593023, |
| "grad_norm": 0.20185396075248718, |
| "learning_rate": 0.00013818606461658234, |
| "loss": 1.0235, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31127951034683765, |
| "grad_norm": 0.211393803358078, |
| "learning_rate": 0.000138108213312573, |
| "loss": 1.0999, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.31166812396774507, |
| "grad_norm": 0.19948823750019073, |
| "learning_rate": 0.00013803036200856365, |
| "loss": 1.0242, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3120567375886525, |
| "grad_norm": 0.21470944583415985, |
| "learning_rate": 0.0001379525107045543, |
| "loss": 1.0736, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.3124453512095599, |
| "grad_norm": 0.2195902317762375, |
| "learning_rate": 0.00013787465940054495, |
| "loss": 1.0368, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3128339648304673, |
| "grad_norm": 0.22142355144023895, |
| "learning_rate": 0.00013779680809653563, |
| "loss": 1.1022, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.31322257845137474, |
| "grad_norm": 0.20487886667251587, |
| "learning_rate": 0.00013771895679252628, |
| "loss": 1.0478, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.3136111920722821, |
| "grad_norm": 0.217549130320549, |
| "learning_rate": 0.00013764110548851693, |
| "loss": 1.0526, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.3139998056931895, |
| "grad_norm": 0.20199982821941376, |
| "learning_rate": 0.0001375632541845076, |
| "loss": 0.9992, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.31438841931409695, |
| "grad_norm": 0.19496634602546692, |
| "learning_rate": 0.00013748540288049824, |
| "loss": 1.0179, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.31477703293500436, |
| "grad_norm": 0.21999460458755493, |
| "learning_rate": 0.0001374075515764889, |
| "loss": 1.0547, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3151656465559118, |
| "grad_norm": 0.21421074867248535, |
| "learning_rate": 0.00013732970027247957, |
| "loss": 1.0283, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.3155542601768192, |
| "grad_norm": 0.1913364827632904, |
| "learning_rate": 0.00013725184896847022, |
| "loss": 0.9826, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3159428737977266, |
| "grad_norm": 0.20509806275367737, |
| "learning_rate": 0.00013717399766446087, |
| "loss": 1.0303, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.31633148741863404, |
| "grad_norm": 0.20309868454933167, |
| "learning_rate": 0.00013709614636045153, |
| "loss": 1.0479, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.31672010103954146, |
| "grad_norm": 0.2274443656206131, |
| "learning_rate": 0.0001370182950564422, |
| "loss": 1.1311, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3171087146604489, |
| "grad_norm": 0.22785170376300812, |
| "learning_rate": 0.00013694044375243286, |
| "loss": 1.1009, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.31749732828135624, |
| "grad_norm": 0.2105439007282257, |
| "learning_rate": 0.0001368625924484235, |
| "loss": 1.0251, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.31788594190226366, |
| "grad_norm": 0.20583970844745636, |
| "learning_rate": 0.00013678474114441416, |
| "loss": 1.0833, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.3182745555231711, |
| "grad_norm": 0.21091191470623016, |
| "learning_rate": 0.00013670688984040484, |
| "loss": 1.071, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3186631691440785, |
| "grad_norm": 0.20645928382873535, |
| "learning_rate": 0.0001366290385363955, |
| "loss": 1.0605, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3190517827649859, |
| "grad_norm": 0.1990513950586319, |
| "learning_rate": 0.00013655118723238614, |
| "loss": 1.0461, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.31944039638589333, |
| "grad_norm": 0.2192249745130539, |
| "learning_rate": 0.00013647333592837682, |
| "loss": 1.0975, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.31982901000680075, |
| "grad_norm": 0.2157617211341858, |
| "learning_rate": 0.00013639548462436748, |
| "loss": 1.091, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.32021762362770817, |
| "grad_norm": 0.21964526176452637, |
| "learning_rate": 0.00013631763332035813, |
| "loss": 1.0286, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.3206062372486156, |
| "grad_norm": 0.2079797089099884, |
| "learning_rate": 0.00013623978201634878, |
| "loss": 1.0257, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.32099485086952295, |
| "grad_norm": 0.21220168471336365, |
| "learning_rate": 0.00013616193071233946, |
| "loss": 1.0046, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.32138346449043037, |
| "grad_norm": 0.2885231673717499, |
| "learning_rate": 0.0001360840794083301, |
| "loss": 1.1442, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3217720781113378, |
| "grad_norm": 0.2096511274576187, |
| "learning_rate": 0.00013600622810432076, |
| "loss": 1.0209, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.3221606917322452, |
| "grad_norm": 0.2179451286792755, |
| "learning_rate": 0.00013592837680031142, |
| "loss": 1.0548, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3225493053531526, |
| "grad_norm": 0.2096329927444458, |
| "learning_rate": 0.00013585052549630207, |
| "loss": 1.0279, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.32293791897406005, |
| "grad_norm": 0.22531811892986298, |
| "learning_rate": 0.00013577267419229275, |
| "loss": 1.0463, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.32332653259496746, |
| "grad_norm": 0.22516901791095734, |
| "learning_rate": 0.0001356948228882834, |
| "loss": 1.1127, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.3237151462158749, |
| "grad_norm": 0.22487780451774597, |
| "learning_rate": 0.00013561697158427405, |
| "loss": 1.0707, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3241037598367823, |
| "grad_norm": 0.20976543426513672, |
| "learning_rate": 0.0001355391202802647, |
| "loss": 1.0217, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.32449237345768966, |
| "grad_norm": 0.19849295914173126, |
| "learning_rate": 0.00013546126897625535, |
| "loss": 1.021, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3248809870785971, |
| "grad_norm": 0.21772268414497375, |
| "learning_rate": 0.00013538341767224603, |
| "loss": 1.0605, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3252696006995045, |
| "grad_norm": 0.19670265913009644, |
| "learning_rate": 0.00013530556636823669, |
| "loss": 1.0165, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3256582143204119, |
| "grad_norm": 0.19339734315872192, |
| "learning_rate": 0.00013522771506422734, |
| "loss": 1.0203, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.32604682794131934, |
| "grad_norm": 0.21289557218551636, |
| "learning_rate": 0.000135149863760218, |
| "loss": 1.0252, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.32643544156222676, |
| "grad_norm": 0.1964789777994156, |
| "learning_rate": 0.00013507201245620864, |
| "loss": 1.0392, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3268240551831342, |
| "grad_norm": 0.20783716440200806, |
| "learning_rate": 0.00013499416115219932, |
| "loss": 1.0569, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3272126688040416, |
| "grad_norm": 0.22782161831855774, |
| "learning_rate": 0.00013491630984818997, |
| "loss": 1.0555, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.327601282424949, |
| "grad_norm": 0.22771142423152924, |
| "learning_rate": 0.00013483845854418063, |
| "loss": 1.085, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.32798989604585643, |
| "grad_norm": 0.19773711264133453, |
| "learning_rate": 0.00013476060724017128, |
| "loss": 1.008, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.3283785096667638, |
| "grad_norm": 0.22399166226387024, |
| "learning_rate": 0.00013468275593616193, |
| "loss": 1.0511, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3287671232876712, |
| "grad_norm": 0.20488236844539642, |
| "learning_rate": 0.00013460490463215258, |
| "loss": 1.0883, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.32915573690857863, |
| "grad_norm": 0.21387654542922974, |
| "learning_rate": 0.00013452705332814326, |
| "loss": 1.0808, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.32954435052948605, |
| "grad_norm": 0.1972568780183792, |
| "learning_rate": 0.0001344492020241339, |
| "loss": 1.0555, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.32993296415039347, |
| "grad_norm": 0.20835663378238678, |
| "learning_rate": 0.00013437135072012456, |
| "loss": 1.0473, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3303215777713009, |
| "grad_norm": 0.19707520306110382, |
| "learning_rate": 0.00013429349941611522, |
| "loss": 0.9585, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3307101913922083, |
| "grad_norm": 0.19163411855697632, |
| "learning_rate": 0.00013421564811210587, |
| "loss": 1.0025, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.3310988050131157, |
| "grad_norm": 0.19730083644390106, |
| "learning_rate": 0.00013413779680809655, |
| "loss": 1.0696, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.33148741863402315, |
| "grad_norm": 0.19537493586540222, |
| "learning_rate": 0.0001340599455040872, |
| "loss": 1.0466, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3318760322549305, |
| "grad_norm": 0.2255164235830307, |
| "learning_rate": 0.00013398209420007785, |
| "loss": 1.0659, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3322646458758379, |
| "grad_norm": 0.19774770736694336, |
| "learning_rate": 0.0001339042428960685, |
| "loss": 1.0326, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.33265325949674535, |
| "grad_norm": 0.2004510909318924, |
| "learning_rate": 0.00013382639159205916, |
| "loss": 1.0327, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.33304187311765276, |
| "grad_norm": 0.19187591969966888, |
| "learning_rate": 0.00013374854028804984, |
| "loss": 1.0069, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3334304867385602, |
| "grad_norm": 0.18775832653045654, |
| "learning_rate": 0.0001336706889840405, |
| "loss": 1.0083, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3338191003594676, |
| "grad_norm": 0.2005717158317566, |
| "learning_rate": 0.00013359283768003114, |
| "loss": 1.0398, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.334207713980375, |
| "grad_norm": 0.19705893099308014, |
| "learning_rate": 0.0001335149863760218, |
| "loss": 1.0031, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.33459632760128244, |
| "grad_norm": 0.19589562714099884, |
| "learning_rate": 0.00013343713507201244, |
| "loss": 0.9831, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.33498494122218986, |
| "grad_norm": 0.19302591681480408, |
| "learning_rate": 0.00013335928376800312, |
| "loss": 1.0009, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.3353735548430973, |
| "grad_norm": 0.20499618351459503, |
| "learning_rate": 0.00013328143246399377, |
| "loss": 1.0205, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.33576216846400464, |
| "grad_norm": 0.20514456927776337, |
| "learning_rate": 0.00013320358115998443, |
| "loss": 1.0837, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.33615078208491206, |
| "grad_norm": 0.19285848736763, |
| "learning_rate": 0.00013312572985597508, |
| "loss": 1.0167, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3365393957058195, |
| "grad_norm": 0.20891553163528442, |
| "learning_rate": 0.00013304787855196573, |
| "loss": 1.0127, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.3369280093267269, |
| "grad_norm": 0.20511706173419952, |
| "learning_rate": 0.0001329700272479564, |
| "loss": 0.964, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.3373166229476343, |
| "grad_norm": 0.1855512261390686, |
| "learning_rate": 0.00013289217594394706, |
| "loss": 0.9721, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.33770523656854173, |
| "grad_norm": 0.20010098814964294, |
| "learning_rate": 0.00013281432463993771, |
| "loss": 1.0411, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.33809385018944915, |
| "grad_norm": 0.1991325318813324, |
| "learning_rate": 0.0001327364733359284, |
| "loss": 0.9658, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.33848246381035657, |
| "grad_norm": 0.19895736873149872, |
| "learning_rate": 0.00013265862203191905, |
| "loss": 1.0744, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.338871077431264, |
| "grad_norm": 0.2091255635023117, |
| "learning_rate": 0.0001325807707279097, |
| "loss": 1.0375, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.33925969105217135, |
| "grad_norm": 0.21355532109737396, |
| "learning_rate": 0.00013250291942390035, |
| "loss": 1.09, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.33964830467307877, |
| "grad_norm": 0.21844851970672607, |
| "learning_rate": 0.00013242506811989103, |
| "loss": 1.0769, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3400369182939862, |
| "grad_norm": 0.1877543330192566, |
| "learning_rate": 0.00013234721681588168, |
| "loss": 1.0199, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.2020038366317749, |
| "learning_rate": 0.00013226936551187233, |
| "loss": 1.0218, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.340814145535801, |
| "grad_norm": 0.20682141184806824, |
| "learning_rate": 0.000132191514207863, |
| "loss": 1.0891, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.34120275915670845, |
| "grad_norm": 0.21942824125289917, |
| "learning_rate": 0.00013211366290385366, |
| "loss": 0.9877, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.34159137277761586, |
| "grad_norm": 0.21150313317775726, |
| "learning_rate": 0.00013203581159984432, |
| "loss": 1.0815, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.3419799863985233, |
| "grad_norm": 0.2073293924331665, |
| "learning_rate": 0.00013195796029583497, |
| "loss": 1.0579, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3423686000194307, |
| "grad_norm": 0.221574068069458, |
| "learning_rate": 0.00013188010899182562, |
| "loss": 1.0279, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.3427572136403381, |
| "grad_norm": 0.22334492206573486, |
| "learning_rate": 0.00013180225768781627, |
| "loss": 1.0837, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.3431458272612455, |
| "grad_norm": 0.18817654252052307, |
| "learning_rate": 0.00013172440638380695, |
| "loss": 1.0262, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.3435344408821529, |
| "grad_norm": 0.20126822590827942, |
| "learning_rate": 0.0001316465550797976, |
| "loss": 1.0679, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.3439230545030603, |
| "grad_norm": 0.2128864973783493, |
| "learning_rate": 0.00013156870377578825, |
| "loss": 1.0316, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.34431166812396774, |
| "grad_norm": 0.20054499804973602, |
| "learning_rate": 0.0001314908524717789, |
| "loss": 1.0024, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.34470028174487516, |
| "grad_norm": 0.21358034014701843, |
| "learning_rate": 0.00013141300116776956, |
| "loss": 1.0475, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3450888953657826, |
| "grad_norm": 0.21377703547477722, |
| "learning_rate": 0.00013133514986376024, |
| "loss": 1.0957, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.34547750898669, |
| "grad_norm": 0.20166514813899994, |
| "learning_rate": 0.0001312572985597509, |
| "loss": 1.0189, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3458661226075974, |
| "grad_norm": 0.20424878597259521, |
| "learning_rate": 0.00013117944725574154, |
| "loss": 1.0896, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.34625473622850483, |
| "grad_norm": 0.19028648734092712, |
| "learning_rate": 0.0001311015959517322, |
| "loss": 0.9881, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.3466433498494122, |
| "grad_norm": 0.20828665792942047, |
| "learning_rate": 0.00013102374464772285, |
| "loss": 0.9932, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3470319634703196, |
| "grad_norm": 0.20756572484970093, |
| "learning_rate": 0.00013094589334371353, |
| "loss": 1.0406, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.34742057709122703, |
| "grad_norm": 0.20768921077251434, |
| "learning_rate": 0.00013086804203970418, |
| "loss": 0.9652, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.34780919071213445, |
| "grad_norm": 0.20660027861595154, |
| "learning_rate": 0.00013079019073569483, |
| "loss": 1.0728, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.34819780433304187, |
| "grad_norm": 0.20186837017536163, |
| "learning_rate": 0.00013071233943168548, |
| "loss": 1.0407, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3485864179539493, |
| "grad_norm": 0.20880667865276337, |
| "learning_rate": 0.00013063448812767613, |
| "loss": 1.0275, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.3489750315748567, |
| "grad_norm": 0.22212949395179749, |
| "learning_rate": 0.0001305566368236668, |
| "loss": 1.0293, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.3493636451957641, |
| "grad_norm": 0.20552745461463928, |
| "learning_rate": 0.00013047878551965746, |
| "loss": 1.0434, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.34975225881667155, |
| "grad_norm": 0.21239839494228363, |
| "learning_rate": 0.00013040093421564812, |
| "loss": 1.052, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3501408724375789, |
| "grad_norm": 0.22420544922351837, |
| "learning_rate": 0.00013032308291163877, |
| "loss": 1.0236, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.35052948605848633, |
| "grad_norm": 0.23435090482234955, |
| "learning_rate": 0.00013024523160762942, |
| "loss": 1.0876, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.35091809967939375, |
| "grad_norm": 0.22763386368751526, |
| "learning_rate": 0.0001301673803036201, |
| "loss": 1.0636, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.35130671330030117, |
| "grad_norm": 0.20948883891105652, |
| "learning_rate": 0.00013008952899961075, |
| "loss": 1.0083, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.3516953269212086, |
| "grad_norm": 0.20408779382705688, |
| "learning_rate": 0.0001300116776956014, |
| "loss": 1.039, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.352083940542116, |
| "grad_norm": 0.2126050591468811, |
| "learning_rate": 0.00012993382639159206, |
| "loss": 1.0365, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3524725541630234, |
| "grad_norm": 0.20314334332942963, |
| "learning_rate": 0.0001298559750875827, |
| "loss": 1.0474, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.35286116778393084, |
| "grad_norm": 0.23720984160900116, |
| "learning_rate": 0.0001297781237835734, |
| "loss": 1.0529, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.35324978140483826, |
| "grad_norm": 0.22642800211906433, |
| "learning_rate": 0.00012970027247956404, |
| "loss": 1.0586, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3536383950257457, |
| "grad_norm": 0.20469972491264343, |
| "learning_rate": 0.0001296224211755547, |
| "loss": 1.0267, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.35402700864665304, |
| "grad_norm": 0.197368785738945, |
| "learning_rate": 0.00012954456987154534, |
| "loss": 1.0348, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.35441562226756046, |
| "grad_norm": 0.21924498677253723, |
| "learning_rate": 0.000129466718567536, |
| "loss": 1.0861, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.3548042358884679, |
| "grad_norm": 0.22006285190582275, |
| "learning_rate": 0.00012938886726352667, |
| "loss": 1.0545, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.3551928495093753, |
| "grad_norm": 0.22419220209121704, |
| "learning_rate": 0.00012931101595951733, |
| "loss": 1.0716, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.3555814631302827, |
| "grad_norm": 0.215990349650383, |
| "learning_rate": 0.00012923316465550798, |
| "loss": 1.0619, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.35597007675119013, |
| "grad_norm": 0.20783264935016632, |
| "learning_rate": 0.00012915531335149863, |
| "loss": 1.0412, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.35635869037209755, |
| "grad_norm": 0.24584618210792542, |
| "learning_rate": 0.00012907746204748928, |
| "loss": 1.1165, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.35674730399300497, |
| "grad_norm": 0.23146122694015503, |
| "learning_rate": 0.00012899961074347996, |
| "loss": 1.1111, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3571359176139124, |
| "grad_norm": 0.19983729720115662, |
| "learning_rate": 0.00012892175943947061, |
| "loss": 1.0674, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.35752453123481975, |
| "grad_norm": 0.2161000818014145, |
| "learning_rate": 0.00012884390813546127, |
| "loss": 1.076, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.35791314485572717, |
| "grad_norm": 0.21042793989181519, |
| "learning_rate": 0.00012876605683145192, |
| "loss": 1.0535, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3583017584766346, |
| "grad_norm": 0.20135439932346344, |
| "learning_rate": 0.0001286882055274426, |
| "loss": 1.0059, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.358690372097542, |
| "grad_norm": 0.19394971430301666, |
| "learning_rate": 0.00012861035422343325, |
| "loss": 1.0381, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.35907898571844943, |
| "grad_norm": 0.21171030402183533, |
| "learning_rate": 0.0001285325029194239, |
| "loss": 1.0513, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.35946759933935685, |
| "grad_norm": 0.19476690888404846, |
| "learning_rate": 0.00012845465161541458, |
| "loss": 1.0003, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.35985621296026427, |
| "grad_norm": 0.20468670129776, |
| "learning_rate": 0.00012837680031140523, |
| "loss": 1.0608, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.3602448265811717, |
| "grad_norm": 0.21159446239471436, |
| "learning_rate": 0.00012829894900739588, |
| "loss": 1.0734, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3606334402020791, |
| "grad_norm": 0.21179519593715668, |
| "learning_rate": 0.00012822109770338654, |
| "loss": 1.0957, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3610220538229865, |
| "grad_norm": 0.20997527241706848, |
| "learning_rate": 0.00012814324639937722, |
| "loss": 1.0644, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3614106674438939, |
| "grad_norm": 0.21178296208381653, |
| "learning_rate": 0.00012806539509536787, |
| "loss": 1.0208, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3617992810648013, |
| "grad_norm": 0.20890356600284576, |
| "learning_rate": 0.00012798754379135852, |
| "loss": 1.0888, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3621878946857087, |
| "grad_norm": 0.20177409052848816, |
| "learning_rate": 0.00012790969248734917, |
| "loss": 0.9741, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.36257650830661614, |
| "grad_norm": 0.23504556715488434, |
| "learning_rate": 0.00012783184118333982, |
| "loss": 1.1048, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.36296512192752356, |
| "grad_norm": 0.22829356789588928, |
| "learning_rate": 0.0001277539898793305, |
| "loss": 1.0798, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.363353735548431, |
| "grad_norm": 0.2068483531475067, |
| "learning_rate": 0.00012767613857532116, |
| "loss": 1.0452, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3637423491693384, |
| "grad_norm": 0.2093171775341034, |
| "learning_rate": 0.0001275982872713118, |
| "loss": 1.0742, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3641309627902458, |
| "grad_norm": 0.21478736400604248, |
| "learning_rate": 0.00012752043596730246, |
| "loss": 1.0572, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.36451957641115323, |
| "grad_norm": 0.1906953752040863, |
| "learning_rate": 0.0001274425846632931, |
| "loss": 1.0107, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3649081900320606, |
| "grad_norm": 0.20580604672431946, |
| "learning_rate": 0.0001273647333592838, |
| "loss": 1.0677, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.365296803652968, |
| "grad_norm": 0.22586850821971893, |
| "learning_rate": 0.00012728688205527444, |
| "loss": 1.0389, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.36568541727387543, |
| "grad_norm": 0.199899360537529, |
| "learning_rate": 0.0001272090307512651, |
| "loss": 1.0462, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.36607403089478285, |
| "grad_norm": 0.19881689548492432, |
| "learning_rate": 0.00012713117944725575, |
| "loss": 1.0565, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.3664626445156903, |
| "grad_norm": 0.21748925745487213, |
| "learning_rate": 0.0001270533281432464, |
| "loss": 1.0659, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.3668512581365977, |
| "grad_norm": 0.19363689422607422, |
| "learning_rate": 0.00012697547683923708, |
| "loss": 1.0307, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.3672398717575051, |
| "grad_norm": 0.21701784431934357, |
| "learning_rate": 0.00012689762553522773, |
| "loss": 1.0684, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.36762848537841253, |
| "grad_norm": 0.21406958997249603, |
| "learning_rate": 0.00012681977423121838, |
| "loss": 1.0703, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.36801709899931995, |
| "grad_norm": 0.23539729416370392, |
| "learning_rate": 0.00012674192292720903, |
| "loss": 1.1537, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.36840571262022737, |
| "grad_norm": 0.2177354395389557, |
| "learning_rate": 0.00012666407162319969, |
| "loss": 1.0131, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.36879432624113473, |
| "grad_norm": 0.255346417427063, |
| "learning_rate": 0.00012658622031919037, |
| "loss": 0.9807, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.36918293986204215, |
| "grad_norm": 0.2139921486377716, |
| "learning_rate": 0.00012650836901518102, |
| "loss": 1.0392, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.36957155348294957, |
| "grad_norm": 0.22490833699703217, |
| "learning_rate": 0.00012643051771117167, |
| "loss": 1.0512, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.369960167103857, |
| "grad_norm": 0.20698820054531097, |
| "learning_rate": 0.00012635266640716232, |
| "loss": 1.0391, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.3703487807247644, |
| "grad_norm": 0.2276201844215393, |
| "learning_rate": 0.00012627481510315297, |
| "loss": 1.0513, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3707373943456718, |
| "grad_norm": 0.2493600994348526, |
| "learning_rate": 0.00012619696379914365, |
| "loss": 1.0136, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.37112600796657924, |
| "grad_norm": 0.2155001014471054, |
| "learning_rate": 0.0001261191124951343, |
| "loss": 1.0523, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.37151462158748666, |
| "grad_norm": 0.21571211516857147, |
| "learning_rate": 0.00012604126119112496, |
| "loss": 1.0288, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.3719032352083941, |
| "grad_norm": 0.23238877952098846, |
| "learning_rate": 0.0001259634098871156, |
| "loss": 1.0638, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.37229184882930144, |
| "grad_norm": 0.2002813220024109, |
| "learning_rate": 0.00012588555858310626, |
| "loss": 0.9665, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.37268046245020886, |
| "grad_norm": 0.21712858974933624, |
| "learning_rate": 0.0001258077072790969, |
| "loss": 1.0469, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.3730690760711163, |
| "grad_norm": 0.2178192287683487, |
| "learning_rate": 0.0001257298559750876, |
| "loss": 1.0267, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3734576896920237, |
| "grad_norm": 0.25488024950027466, |
| "learning_rate": 0.00012565200467107824, |
| "loss": 1.0153, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.3738463033129311, |
| "grad_norm": 0.20070038735866547, |
| "learning_rate": 0.0001255741533670689, |
| "loss": 1.0279, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.37423491693383854, |
| "grad_norm": 0.21885356307029724, |
| "learning_rate": 0.00012549630206305955, |
| "loss": 1.0395, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.37462353055474595, |
| "grad_norm": 0.2407921701669693, |
| "learning_rate": 0.0001254184507590502, |
| "loss": 1.0767, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.3750121441756534, |
| "grad_norm": 0.20645053684711456, |
| "learning_rate": 0.00012534059945504088, |
| "loss": 1.0318, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.3754007577965608, |
| "grad_norm": 0.21275092661380768, |
| "learning_rate": 0.00012526274815103153, |
| "loss": 1.0546, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.3757893714174682, |
| "grad_norm": 0.21574917435646057, |
| "learning_rate": 0.00012518489684702218, |
| "loss": 1.032, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3761779850383756, |
| "grad_norm": 0.21589480340480804, |
| "learning_rate": 0.00012510704554301284, |
| "loss": 1.0834, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.376566598659283, |
| "grad_norm": 0.19576796889305115, |
| "learning_rate": 0.0001250291942390035, |
| "loss": 1.0178, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3769552122801904, |
| "grad_norm": 0.20941287279129028, |
| "learning_rate": 0.00012495134293499417, |
| "loss": 1.0712, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.37734382590109783, |
| "grad_norm": 0.22585494816303253, |
| "learning_rate": 0.00012487349163098482, |
| "loss": 1.0401, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.37773243952200525, |
| "grad_norm": 0.21093420684337616, |
| "learning_rate": 0.00012479564032697547, |
| "loss": 1.0569, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.37812105314291267, |
| "grad_norm": 0.22375014424324036, |
| "learning_rate": 0.00012471778902296612, |
| "loss": 1.0687, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3785096667638201, |
| "grad_norm": 0.19787487387657166, |
| "learning_rate": 0.0001246399377189568, |
| "loss": 1.0266, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3788982803847275, |
| "grad_norm": 0.20633013546466827, |
| "learning_rate": 0.00012456208641494745, |
| "loss": 0.9996, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.3792868940056349, |
| "grad_norm": 0.21559873223304749, |
| "learning_rate": 0.0001244842351109381, |
| "loss": 1.0851, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.3796755076265423, |
| "grad_norm": 0.2166333943605423, |
| "learning_rate": 0.00012440638380692879, |
| "loss": 1.0859, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.3800641212474497, |
| "grad_norm": 0.18558773398399353, |
| "learning_rate": 0.00012432853250291944, |
| "loss": 0.9534, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.3804527348683571, |
| "grad_norm": 0.2086942344903946, |
| "learning_rate": 0.0001242506811989101, |
| "loss": 1.0786, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.38084134848926454, |
| "grad_norm": 0.2207823544740677, |
| "learning_rate": 0.00012417282989490074, |
| "loss": 1.0626, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.38122996211017196, |
| "grad_norm": 0.21255749464035034, |
| "learning_rate": 0.00012409497859089142, |
| "loss": 1.063, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.3816185757310794, |
| "grad_norm": 0.20682042837142944, |
| "learning_rate": 0.00012401712728688207, |
| "loss": 1.034, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3820071893519868, |
| "grad_norm": 0.2084134966135025, |
| "learning_rate": 0.00012393927598287272, |
| "loss": 1.0481, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3823958029728942, |
| "grad_norm": 0.1922312080860138, |
| "learning_rate": 0.00012386142467886338, |
| "loss": 1.0461, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.38278441659380164, |
| "grad_norm": 0.20893707871437073, |
| "learning_rate": 0.00012378357337485406, |
| "loss": 1.0797, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.383173030214709, |
| "grad_norm": 0.19717541337013245, |
| "learning_rate": 0.0001237057220708447, |
| "loss": 1.0028, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3835616438356164, |
| "grad_norm": 0.20688053965568542, |
| "learning_rate": 0.00012362787076683536, |
| "loss": 0.989, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.38395025745652384, |
| "grad_norm": 0.20580583810806274, |
| "learning_rate": 0.000123550019462826, |
| "loss": 1.06, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.38433887107743125, |
| "grad_norm": 0.2151709794998169, |
| "learning_rate": 0.00012347216815881666, |
| "loss": 1.0685, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3847274846983387, |
| "grad_norm": 0.19573980569839478, |
| "learning_rate": 0.00012339431685480734, |
| "loss": 1.0072, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3851160983192461, |
| "grad_norm": 0.1949119120836258, |
| "learning_rate": 0.000123316465550798, |
| "loss": 0.9995, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.3855047119401535, |
| "grad_norm": 0.2062375247478485, |
| "learning_rate": 0.00012323861424678865, |
| "loss": 1.0694, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.38589332556106093, |
| "grad_norm": 0.2007209211587906, |
| "learning_rate": 0.0001231607629427793, |
| "loss": 1.0397, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.38628193918196835, |
| "grad_norm": 0.2231544405221939, |
| "learning_rate": 0.00012308291163876995, |
| "loss": 1.0755, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.38667055280287577, |
| "grad_norm": 0.2103337049484253, |
| "learning_rate": 0.0001230050603347606, |
| "loss": 1.0505, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.38705916642378313, |
| "grad_norm": 0.20178386569023132, |
| "learning_rate": 0.00012292720903075128, |
| "loss": 1.0696, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.38744778004469055, |
| "grad_norm": 0.21268007159233093, |
| "learning_rate": 0.00012284935772674193, |
| "loss": 1.0262, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.38783639366559797, |
| "grad_norm": 0.21439722180366516, |
| "learning_rate": 0.0001227715064227326, |
| "loss": 1.0718, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.3882250072865054, |
| "grad_norm": 0.19691336154937744, |
| "learning_rate": 0.00012269365511872324, |
| "loss": 0.9663, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.3886136209074128, |
| "grad_norm": 0.2165926694869995, |
| "learning_rate": 0.0001226158038147139, |
| "loss": 1.0432, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3890022345283202, |
| "grad_norm": 0.20730604231357574, |
| "learning_rate": 0.00012253795251070457, |
| "loss": 1.0386, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.38939084814922764, |
| "grad_norm": 0.2138068974018097, |
| "learning_rate": 0.00012246010120669522, |
| "loss": 1.0683, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.38977946177013506, |
| "grad_norm": 0.2118951678276062, |
| "learning_rate": 0.00012238224990268587, |
| "loss": 1.0393, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3901680753910425, |
| "grad_norm": 0.20879961550235748, |
| "learning_rate": 0.00012230439859867653, |
| "loss": 1.0349, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.39055668901194984, |
| "grad_norm": 0.19588464498519897, |
| "learning_rate": 0.00012222654729466718, |
| "loss": 1.0226, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.39094530263285726, |
| "grad_norm": 0.2059485912322998, |
| "learning_rate": 0.00012214869599065786, |
| "loss": 1.052, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3913339162537647, |
| "grad_norm": 0.2299761176109314, |
| "learning_rate": 0.0001220708446866485, |
| "loss": 1.1055, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.3917225298746721, |
| "grad_norm": 0.20196737349033356, |
| "learning_rate": 0.00012199299338263916, |
| "loss": 1.0497, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.3921111434955795, |
| "grad_norm": 0.20615293085575104, |
| "learning_rate": 0.00012191514207862981, |
| "loss": 1.047, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.39249975711648694, |
| "grad_norm": 0.20265278220176697, |
| "learning_rate": 0.00012183729077462047, |
| "loss": 1.0035, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.39288837073739435, |
| "grad_norm": 0.20197926461696625, |
| "learning_rate": 0.00012175943947061114, |
| "loss": 0.9847, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3932769843583018, |
| "grad_norm": 0.19974152743816376, |
| "learning_rate": 0.0001216815881666018, |
| "loss": 1.0669, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.3936655979792092, |
| "grad_norm": 0.21684005856513977, |
| "learning_rate": 0.00012160373686259245, |
| "loss": 1.0562, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.3940542116001166, |
| "grad_norm": 0.2030404955148697, |
| "learning_rate": 0.00012152588555858311, |
| "loss": 1.0159, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.394442825221024, |
| "grad_norm": 0.2123572677373886, |
| "learning_rate": 0.00012144803425457377, |
| "loss": 1.0757, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.3948314388419314, |
| "grad_norm": 0.20320011675357819, |
| "learning_rate": 0.00012137018295056443, |
| "loss": 1.038, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.3952200524628388, |
| "grad_norm": 0.20120739936828613, |
| "learning_rate": 0.00012129233164655508, |
| "loss": 1.1015, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.39560866608374623, |
| "grad_norm": 0.19862449169158936, |
| "learning_rate": 0.00012121448034254575, |
| "loss": 1.0328, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.39599727970465365, |
| "grad_norm": 0.19761312007904053, |
| "learning_rate": 0.0001211366290385364, |
| "loss": 0.997, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.39638589332556107, |
| "grad_norm": 0.1943569928407669, |
| "learning_rate": 0.00012105877773452705, |
| "loss": 1.0099, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3967745069464685, |
| "grad_norm": 0.2109062373638153, |
| "learning_rate": 0.00012098092643051773, |
| "loss": 1.1039, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.3971631205673759, |
| "grad_norm": 0.20966266095638275, |
| "learning_rate": 0.00012090307512650839, |
| "loss": 1.1208, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.3975517341882833, |
| "grad_norm": 0.19208088517189026, |
| "learning_rate": 0.00012082522382249904, |
| "loss": 1.0147, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.3979403478091907, |
| "grad_norm": 0.21821236610412598, |
| "learning_rate": 0.00012074737251848969, |
| "loss": 1.0615, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.3983289614300981, |
| "grad_norm": 0.20031368732452393, |
| "learning_rate": 0.00012066952121448034, |
| "loss": 1.0303, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.3987175750510055, |
| "grad_norm": 0.22910597920417786, |
| "learning_rate": 0.00012059166991047102, |
| "loss": 1.0182, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.39910618867191294, |
| "grad_norm": 0.20816978812217712, |
| "learning_rate": 0.00012051381860646167, |
| "loss": 1.0142, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.39949480229282036, |
| "grad_norm": 0.20989780128002167, |
| "learning_rate": 0.00012043596730245232, |
| "loss": 1.0676, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.3998834159137278, |
| "grad_norm": 0.21894055604934692, |
| "learning_rate": 0.00012035811599844298, |
| "loss": 1.0222, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.4002720295346352, |
| "grad_norm": 0.2170870155096054, |
| "learning_rate": 0.00012028026469443363, |
| "loss": 1.0319, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4006606431555426, |
| "grad_norm": 0.20869679749011993, |
| "learning_rate": 0.00012020241339042428, |
| "loss": 1.055, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.40104925677645004, |
| "grad_norm": 0.18850640952587128, |
| "learning_rate": 0.00012012456208641496, |
| "loss": 0.9993, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.40143787039735745, |
| "grad_norm": 0.21462580561637878, |
| "learning_rate": 0.00012004671078240561, |
| "loss": 1.0115, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.4018264840182648, |
| "grad_norm": 0.2008499950170517, |
| "learning_rate": 0.00011996885947839626, |
| "loss": 1.0229, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.40221509763917224, |
| "grad_norm": 0.20063354074954987, |
| "learning_rate": 0.00011989100817438692, |
| "loss": 1.0295, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.40260371126007966, |
| "grad_norm": 0.20655786991119385, |
| "learning_rate": 0.00011981315687037757, |
| "loss": 1.0044, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.4029923248809871, |
| "grad_norm": 0.1985999196767807, |
| "learning_rate": 0.00011973530556636825, |
| "loss": 1.0063, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.4033809385018945, |
| "grad_norm": 0.2039060890674591, |
| "learning_rate": 0.0001196574542623589, |
| "loss": 1.044, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4037695521228019, |
| "grad_norm": 0.21838189661502838, |
| "learning_rate": 0.00011957960295834955, |
| "loss": 1.1101, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.40415816574370933, |
| "grad_norm": 0.21508415043354034, |
| "learning_rate": 0.00011950175165434022, |
| "loss": 1.0764, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.40454677936461675, |
| "grad_norm": 0.2089119255542755, |
| "learning_rate": 0.00011942390035033087, |
| "loss": 0.9986, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.40493539298552417, |
| "grad_norm": 0.19859452545642853, |
| "learning_rate": 0.00011934604904632153, |
| "loss": 1.0122, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.40532400660643153, |
| "grad_norm": 0.2018653154373169, |
| "learning_rate": 0.00011926819774231219, |
| "loss": 1.0187, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.40571262022733895, |
| "grad_norm": 0.19892063736915588, |
| "learning_rate": 0.00011919034643830285, |
| "loss": 1.0029, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.40610123384824637, |
| "grad_norm": 0.20355650782585144, |
| "learning_rate": 0.0001191124951342935, |
| "loss": 1.0484, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.4064898474691538, |
| "grad_norm": 0.2033994495868683, |
| "learning_rate": 0.00011903464383028416, |
| "loss": 1.087, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.4068784610900612, |
| "grad_norm": 0.2047330141067505, |
| "learning_rate": 0.00011895679252627484, |
| "loss": 1.0774, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4072670747109686, |
| "grad_norm": 0.21420112252235413, |
| "learning_rate": 0.00011887894122226549, |
| "loss": 1.0252, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.40765568833187604, |
| "grad_norm": 0.2030097395181656, |
| "learning_rate": 0.00011880108991825614, |
| "loss": 1.0501, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.40804430195278346, |
| "grad_norm": 0.2128026783466339, |
| "learning_rate": 0.00011872323861424679, |
| "loss": 1.1031, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4084329155736909, |
| "grad_norm": 0.20724938809871674, |
| "learning_rate": 0.00011864538731023744, |
| "loss": 1.0327, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.40882152919459824, |
| "grad_norm": 0.20344072580337524, |
| "learning_rate": 0.00011856753600622812, |
| "loss": 1.0719, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.40921014281550566, |
| "grad_norm": 0.2145012468099594, |
| "learning_rate": 0.00011848968470221877, |
| "loss": 1.0582, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.4095987564364131, |
| "grad_norm": 0.220048725605011, |
| "learning_rate": 0.00011841183339820943, |
| "loss": 1.0825, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4099873700573205, |
| "grad_norm": 0.19074465334415436, |
| "learning_rate": 0.00011833398209420008, |
| "loss": 0.9657, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.4103759836782279, |
| "grad_norm": 0.1958267241716385, |
| "learning_rate": 0.00011825613079019073, |
| "loss": 0.9864, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.41076459729913534, |
| "grad_norm": 0.21768233180046082, |
| "learning_rate": 0.00011817827948618141, |
| "loss": 0.9997, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.41115321092004276, |
| "grad_norm": 0.20218704640865326, |
| "learning_rate": 0.00011810042818217206, |
| "loss": 1.072, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4115418245409502, |
| "grad_norm": 0.2035023719072342, |
| "learning_rate": 0.00011802257687816271, |
| "loss": 1.0415, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4119304381618576, |
| "grad_norm": 0.22603970766067505, |
| "learning_rate": 0.00011794472557415337, |
| "loss": 1.0751, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.412319051782765, |
| "grad_norm": 0.2125842273235321, |
| "learning_rate": 0.00011786687427014402, |
| "loss": 1.0727, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.4127076654036724, |
| "grad_norm": 0.2005981206893921, |
| "learning_rate": 0.0001177890229661347, |
| "loss": 1.0191, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.4130962790245798, |
| "grad_norm": 0.22252701222896576, |
| "learning_rate": 0.00011771117166212535, |
| "loss": 1.0591, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.4134848926454872, |
| "grad_norm": 0.22205251455307007, |
| "learning_rate": 0.000117633320358116, |
| "loss": 1.1198, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.41387350626639463, |
| "grad_norm": 0.20037783682346344, |
| "learning_rate": 0.00011755546905410665, |
| "loss": 1.0548, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.41426211988730205, |
| "grad_norm": 0.21737834811210632, |
| "learning_rate": 0.00011747761775009732, |
| "loss": 1.0922, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.41465073350820947, |
| "grad_norm": 0.19312533736228943, |
| "learning_rate": 0.00011739976644608798, |
| "loss": 0.9836, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4150393471291169, |
| "grad_norm": 0.22055000066757202, |
| "learning_rate": 0.00011732191514207864, |
| "loss": 1.0383, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.4154279607500243, |
| "grad_norm": 0.22623857855796814, |
| "learning_rate": 0.0001172440638380693, |
| "loss": 1.0704, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.4158165743709317, |
| "grad_norm": 0.21481367945671082, |
| "learning_rate": 0.00011716621253405995, |
| "loss": 1.052, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4162051879918391, |
| "grad_norm": 0.21022087335586548, |
| "learning_rate": 0.0001170883612300506, |
| "loss": 1.1021, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.4165938016127465, |
| "grad_norm": 0.2154620885848999, |
| "learning_rate": 0.00011701050992604126, |
| "loss": 1.0128, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.4169824152336539, |
| "grad_norm": 0.20545578002929688, |
| "learning_rate": 0.00011693265862203194, |
| "loss": 1.0058, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.41737102885456134, |
| "grad_norm": 0.21726195514202118, |
| "learning_rate": 0.00011685480731802259, |
| "loss": 1.0753, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.41775964247546876, |
| "grad_norm": 0.2067115604877472, |
| "learning_rate": 0.00011677695601401324, |
| "loss": 1.0594, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4181482560963762, |
| "grad_norm": 0.23024648427963257, |
| "learning_rate": 0.0001166991047100039, |
| "loss": 1.1039, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4185368697172836, |
| "grad_norm": 0.20692144334316254, |
| "learning_rate": 0.00011662125340599455, |
| "loss": 1.0598, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.418925483338191, |
| "grad_norm": 0.19839999079704285, |
| "learning_rate": 0.00011654340210198522, |
| "loss": 1.054, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.41931409695909844, |
| "grad_norm": 0.19227825105190277, |
| "learning_rate": 0.00011646555079797588, |
| "loss": 0.9453, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.41970271058000586, |
| "grad_norm": 0.2112567275762558, |
| "learning_rate": 0.00011638769949396653, |
| "loss": 1.023, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4200913242009132, |
| "grad_norm": 0.185299351811409, |
| "learning_rate": 0.00011630984818995718, |
| "loss": 0.9752, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.42047993782182064, |
| "grad_norm": 0.20148858428001404, |
| "learning_rate": 0.00011623199688594783, |
| "loss": 1.0659, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.42086855144272806, |
| "grad_norm": 0.1935974359512329, |
| "learning_rate": 0.00011615414558193851, |
| "loss": 1.0116, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4212571650636355, |
| "grad_norm": 0.20433953404426575, |
| "learning_rate": 0.00011607629427792916, |
| "loss": 1.0671, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.4216457786845429, |
| "grad_norm": 0.20729799568653107, |
| "learning_rate": 0.00011599844297391982, |
| "loss": 1.0341, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4220343923054503, |
| "grad_norm": 0.2126002460718155, |
| "learning_rate": 0.00011592059166991047, |
| "loss": 1.0188, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.42242300592635773, |
| "grad_norm": 0.19453707337379456, |
| "learning_rate": 0.00011584274036590112, |
| "loss": 1.0331, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.42281161954726515, |
| "grad_norm": 0.20909856259822845, |
| "learning_rate": 0.0001157648890618918, |
| "loss": 0.9984, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.42320023316817257, |
| "grad_norm": 0.19596272706985474, |
| "learning_rate": 0.00011568703775788245, |
| "loss": 1.0121, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.42358884678907993, |
| "grad_norm": 0.22045716643333435, |
| "learning_rate": 0.0001156091864538731, |
| "loss": 1.0591, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.42397746040998735, |
| "grad_norm": 0.22624897956848145, |
| "learning_rate": 0.00011553133514986376, |
| "loss": 1.0565, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.42436607403089477, |
| "grad_norm": 0.20263417065143585, |
| "learning_rate": 0.00011545348384585442, |
| "loss": 1.024, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.4247546876518022, |
| "grad_norm": 0.20179417729377747, |
| "learning_rate": 0.00011537563254184509, |
| "loss": 0.9806, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.4251433012727096, |
| "grad_norm": 0.30221593379974365, |
| "learning_rate": 0.00011529778123783574, |
| "loss": 1.0683, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 0.21195146441459656, |
| "learning_rate": 0.0001152199299338264, |
| "loss": 1.1283, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.42592052851452444, |
| "grad_norm": 0.21860192716121674, |
| "learning_rate": 0.00011514207862981706, |
| "loss": 1.0046, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.42630914213543186, |
| "grad_norm": 0.2234150469303131, |
| "learning_rate": 0.00011506422732580771, |
| "loss": 1.0461, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.4266977557563393, |
| "grad_norm": 0.21535125374794006, |
| "learning_rate": 0.00011498637602179837, |
| "loss": 1.0593, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.4270863693772467, |
| "grad_norm": 0.19313789904117584, |
| "learning_rate": 0.00011490852471778904, |
| "loss": 1.0357, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.42747498299815406, |
| "grad_norm": 0.19886989891529083, |
| "learning_rate": 0.00011483067341377969, |
| "loss": 0.9946, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4278635966190615, |
| "grad_norm": 0.21028490364551544, |
| "learning_rate": 0.00011475282210977034, |
| "loss": 1.0765, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.4282522102399689, |
| "grad_norm": 0.2066621333360672, |
| "learning_rate": 0.000114674970805761, |
| "loss": 1.0405, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.4286408238608763, |
| "grad_norm": 0.18400220572948456, |
| "learning_rate": 0.00011459711950175168, |
| "loss": 0.9404, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.42902943748178374, |
| "grad_norm": 0.2058599591255188, |
| "learning_rate": 0.00011451926819774233, |
| "loss": 1.0505, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.42941805110269116, |
| "grad_norm": 0.19696786999702454, |
| "learning_rate": 0.00011444141689373298, |
| "loss": 1.032, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.4298066647235986, |
| "grad_norm": 0.2082854062318802, |
| "learning_rate": 0.00011436356558972363, |
| "loss": 1.0914, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.430195278344506, |
| "grad_norm": 0.20155015587806702, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 1.0541, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.4305838919654134, |
| "grad_norm": 0.23419982194900513, |
| "learning_rate": 0.00011420786298170494, |
| "loss": 1.0684, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.4309725055863208, |
| "grad_norm": 0.23493975400924683, |
| "learning_rate": 0.00011413001167769561, |
| "loss": 1.0509, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.4313611192072282, |
| "grad_norm": 0.2089843600988388, |
| "learning_rate": 0.00011405216037368627, |
| "loss": 1.0479, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.4317497328281356, |
| "grad_norm": 0.21076850593090057, |
| "learning_rate": 0.00011397430906967692, |
| "loss": 1.064, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.43213834644904303, |
| "grad_norm": 0.20307987928390503, |
| "learning_rate": 0.00011389645776566757, |
| "loss": 1.0416, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.43252696006995045, |
| "grad_norm": 0.20955562591552734, |
| "learning_rate": 0.00011381860646165822, |
| "loss": 1.0158, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.43291557369085787, |
| "grad_norm": 0.2074531465768814, |
| "learning_rate": 0.0001137407551576489, |
| "loss": 1.0486, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.4333041873117653, |
| "grad_norm": 0.20907235145568848, |
| "learning_rate": 0.00011366290385363955, |
| "loss": 1.0352, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.4336928009326727, |
| "grad_norm": 0.21726477146148682, |
| "learning_rate": 0.0001135850525496302, |
| "loss": 1.0068, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.4340814145535801, |
| "grad_norm": 0.20231984555721283, |
| "learning_rate": 0.00011350720124562086, |
| "loss": 0.9757, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.4344700281744875, |
| "grad_norm": 0.23485834896564484, |
| "learning_rate": 0.00011342934994161152, |
| "loss": 1.0681, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.4348586417953949, |
| "grad_norm": 0.21286556124687195, |
| "learning_rate": 0.00011335149863760219, |
| "loss": 1.0399, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.4352472554163023, |
| "grad_norm": 0.2097872495651245, |
| "learning_rate": 0.00011327364733359284, |
| "loss": 1.0435, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.43563586903720974, |
| "grad_norm": 0.2224377542734146, |
| "learning_rate": 0.00011319579602958351, |
| "loss": 1.1664, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.43602448265811716, |
| "grad_norm": 0.19213411211967468, |
| "learning_rate": 0.00011311794472557416, |
| "loss": 1.0424, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.4364130962790246, |
| "grad_norm": 0.20974959433078766, |
| "learning_rate": 0.00011304009342156481, |
| "loss": 1.0943, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.436801709899932, |
| "grad_norm": 0.19943708181381226, |
| "learning_rate": 0.00011296224211755549, |
| "loss": 1.0652, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.4371903235208394, |
| "grad_norm": 0.1832750141620636, |
| "learning_rate": 0.00011288439081354614, |
| "loss": 0.9883, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.43757893714174684, |
| "grad_norm": 0.2205052226781845, |
| "learning_rate": 0.0001128065395095368, |
| "loss": 1.0733, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.43796755076265426, |
| "grad_norm": 0.2082854062318802, |
| "learning_rate": 0.00011272868820552745, |
| "loss": 1.0141, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.4383561643835616, |
| "grad_norm": 0.22755026817321777, |
| "learning_rate": 0.0001126508369015181, |
| "loss": 1.0942, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.43874477800446904, |
| "grad_norm": 0.2098863571882248, |
| "learning_rate": 0.00011257298559750878, |
| "loss": 0.9987, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.43913339162537646, |
| "grad_norm": 0.20559263229370117, |
| "learning_rate": 0.00011249513429349943, |
| "loss": 1.0345, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4395220052462839, |
| "grad_norm": 0.21955084800720215, |
| "learning_rate": 0.00011241728298949008, |
| "loss": 1.1068, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.4399106188671913, |
| "grad_norm": 0.21353478729724884, |
| "learning_rate": 0.00011233943168548073, |
| "loss": 1.0094, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.4402992324880987, |
| "grad_norm": 0.19822491705417633, |
| "learning_rate": 0.00011226158038147139, |
| "loss": 0.9758, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.44068784610900613, |
| "grad_norm": 0.20079441368579865, |
| "learning_rate": 0.00011218372907746206, |
| "loss": 1.0202, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.44107645972991355, |
| "grad_norm": 0.2261926829814911, |
| "learning_rate": 0.00011210587777345272, |
| "loss": 0.9877, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.44146507335082097, |
| "grad_norm": 0.2264915257692337, |
| "learning_rate": 0.00011202802646944337, |
| "loss": 0.9887, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.44185368697172833, |
| "grad_norm": 0.21853779256343842, |
| "learning_rate": 0.00011195017516543402, |
| "loss": 1.0535, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.44224230059263575, |
| "grad_norm": 0.21332694590091705, |
| "learning_rate": 0.00011187232386142467, |
| "loss": 1.0824, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.44263091421354317, |
| "grad_norm": 0.21350236237049103, |
| "learning_rate": 0.00011179447255741535, |
| "loss": 1.0758, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.4430195278344506, |
| "grad_norm": 0.21305765211582184, |
| "learning_rate": 0.000111716621253406, |
| "loss": 1.035, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.443408141455358, |
| "grad_norm": 0.20486389100551605, |
| "learning_rate": 0.00011163876994939666, |
| "loss": 1.0413, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.4437967550762654, |
| "grad_norm": 0.19255472719669342, |
| "learning_rate": 0.00011156091864538731, |
| "loss": 0.9583, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.44418536869717284, |
| "grad_norm": 0.19824008643627167, |
| "learning_rate": 0.00011148306734137796, |
| "loss": 1.0331, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.44457398231808026, |
| "grad_norm": 0.20308080315589905, |
| "learning_rate": 0.00011140521603736863, |
| "loss": 1.0399, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.4449625959389877, |
| "grad_norm": 0.2193964123725891, |
| "learning_rate": 0.00011132736473335929, |
| "loss": 1.063, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.4453512095598951, |
| "grad_norm": 0.2151576578617096, |
| "learning_rate": 0.00011124951342934994, |
| "loss": 1.0795, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.44573982318080246, |
| "grad_norm": 0.23056697845458984, |
| "learning_rate": 0.00011117166212534061, |
| "loss": 1.0351, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.4461284368017099, |
| "grad_norm": 0.1973094493150711, |
| "learning_rate": 0.00011109381082133126, |
| "loss": 0.9866, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.4465170504226173, |
| "grad_norm": 0.2119562178850174, |
| "learning_rate": 0.00011101595951732191, |
| "loss": 1.0591, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.4469056640435247, |
| "grad_norm": 0.20407763123512268, |
| "learning_rate": 0.00011093810821331259, |
| "loss": 0.988, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.44729427766443214, |
| "grad_norm": 0.19474107027053833, |
| "learning_rate": 0.00011086025690930324, |
| "loss": 0.9729, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.44768289128533956, |
| "grad_norm": 0.2179928421974182, |
| "learning_rate": 0.0001107824056052939, |
| "loss": 1.0558, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.448071504906247, |
| "grad_norm": 0.44306451082229614, |
| "learning_rate": 0.00011070455430128455, |
| "loss": 1.0901, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.4484601185271544, |
| "grad_norm": 0.22060540318489075, |
| "learning_rate": 0.0001106267029972752, |
| "loss": 1.0009, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.4488487321480618, |
| "grad_norm": 0.20534972846508026, |
| "learning_rate": 0.00011054885169326588, |
| "loss": 0.9741, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.4492373457689692, |
| "grad_norm": 0.19488993287086487, |
| "learning_rate": 0.00011047100038925653, |
| "loss": 1.0, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4496259593898766, |
| "grad_norm": 0.20462395250797272, |
| "learning_rate": 0.00011039314908524718, |
| "loss": 1.0309, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.450014573010784, |
| "grad_norm": 0.2170749306678772, |
| "learning_rate": 0.00011031529778123784, |
| "loss": 1.0726, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.45040318663169143, |
| "grad_norm": 0.2066730111837387, |
| "learning_rate": 0.00011023744647722849, |
| "loss": 1.0227, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.45079180025259885, |
| "grad_norm": 0.20625676214694977, |
| "learning_rate": 0.00011015959517321917, |
| "loss": 1.0287, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.45118041387350627, |
| "grad_norm": 0.19483047723770142, |
| "learning_rate": 0.00011008174386920982, |
| "loss": 0.9639, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.4515690274944137, |
| "grad_norm": 0.24705417454242706, |
| "learning_rate": 0.00011000389256520047, |
| "loss": 0.9903, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.4519576411153211, |
| "grad_norm": 0.2109205424785614, |
| "learning_rate": 0.00010992604126119112, |
| "loss": 1.054, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.4523462547362285, |
| "grad_norm": 0.20904991030693054, |
| "learning_rate": 0.00010984818995718178, |
| "loss": 1.0416, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.45273486835713594, |
| "grad_norm": 0.19841328263282776, |
| "learning_rate": 0.00010977033865317245, |
| "loss": 0.9986, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4531234819780433, |
| "grad_norm": 0.20545506477355957, |
| "learning_rate": 0.0001096924873491631, |
| "loss": 1.0337, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4535120955989507, |
| "grad_norm": 0.208644837141037, |
| "learning_rate": 0.00010961463604515376, |
| "loss": 1.0304, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.45390070921985815, |
| "grad_norm": 0.2111911028623581, |
| "learning_rate": 0.00010953678474114441, |
| "loss": 1.0398, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.45428932284076556, |
| "grad_norm": 0.2600184381008148, |
| "learning_rate": 0.00010945893343713506, |
| "loss": 1.0509, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.454677936461673, |
| "grad_norm": 0.2059030532836914, |
| "learning_rate": 0.00010938108213312574, |
| "loss": 0.9347, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4550665500825804, |
| "grad_norm": 0.19232551753520966, |
| "learning_rate": 0.0001093032308291164, |
| "loss": 1.0162, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.4554551637034878, |
| "grad_norm": 0.19147330522537231, |
| "learning_rate": 0.00010922537952510705, |
| "loss": 0.9872, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.45584377732439524, |
| "grad_norm": 0.2599676251411438, |
| "learning_rate": 0.00010914752822109771, |
| "loss": 1.0402, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.45623239094530266, |
| "grad_norm": 0.2159397304058075, |
| "learning_rate": 0.00010906967691708836, |
| "loss": 1.0411, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.45662100456621, |
| "grad_norm": 0.23864266276359558, |
| "learning_rate": 0.00010899182561307903, |
| "loss": 1.054, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.45700961818711744, |
| "grad_norm": 0.2027217596769333, |
| "learning_rate": 0.0001089139743090697, |
| "loss": 0.9713, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.45739823180802486, |
| "grad_norm": 0.1837588995695114, |
| "learning_rate": 0.00010883612300506035, |
| "loss": 0.9698, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.4577868454289323, |
| "grad_norm": 0.20038527250289917, |
| "learning_rate": 0.000108758271701051, |
| "loss": 1.0456, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.4581754590498397, |
| "grad_norm": 0.21525044739246368, |
| "learning_rate": 0.00010868042039704165, |
| "loss": 1.021, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.4585640726707471, |
| "grad_norm": 0.18813730776309967, |
| "learning_rate": 0.0001086025690930323, |
| "loss": 0.9673, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.45895268629165453, |
| "grad_norm": 0.2056179642677307, |
| "learning_rate": 0.00010852471778902298, |
| "loss": 1.0119, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.45934129991256195, |
| "grad_norm": 0.21599683165550232, |
| "learning_rate": 0.00010844686648501363, |
| "loss": 1.0537, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.45972991353346937, |
| "grad_norm": 0.19750265777111053, |
| "learning_rate": 0.00010836901518100429, |
| "loss": 1.0203, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.4601185271543768, |
| "grad_norm": 0.22186161577701569, |
| "learning_rate": 0.00010829116387699494, |
| "loss": 1.0583, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.46050714077528415, |
| "grad_norm": 0.2109905481338501, |
| "learning_rate": 0.00010821331257298559, |
| "loss": 1.0022, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.46089575439619157, |
| "grad_norm": 0.2032858431339264, |
| "learning_rate": 0.00010813546126897627, |
| "loss": 0.9774, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.461284368017099, |
| "grad_norm": 0.20381197333335876, |
| "learning_rate": 0.00010805760996496692, |
| "loss": 0.9768, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.4616729816380064, |
| "grad_norm": 0.20488987863063812, |
| "learning_rate": 0.00010797975866095757, |
| "loss": 1.0448, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4620615952589138, |
| "grad_norm": 0.20257477462291718, |
| "learning_rate": 0.00010790190735694823, |
| "loss": 1.0157, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.46245020887982125, |
| "grad_norm": 0.20761239528656006, |
| "learning_rate": 0.00010782405605293888, |
| "loss": 1.0328, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.46283882250072866, |
| "grad_norm": 0.22062581777572632, |
| "learning_rate": 0.00010774620474892956, |
| "loss": 1.0362, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.4632274361216361, |
| "grad_norm": 0.19970272481441498, |
| "learning_rate": 0.00010766835344492021, |
| "loss": 1.0783, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.4636160497425435, |
| "grad_norm": 0.2221893072128296, |
| "learning_rate": 0.00010759050214091086, |
| "loss": 1.0136, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.46400466336345086, |
| "grad_norm": 0.2124665081501007, |
| "learning_rate": 0.00010751265083690151, |
| "loss": 1.0528, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.4643932769843583, |
| "grad_norm": 0.2001204937696457, |
| "learning_rate": 0.00010743479953289218, |
| "loss": 1.0495, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.4647818906052657, |
| "grad_norm": 0.20979635417461395, |
| "learning_rate": 0.00010735694822888284, |
| "loss": 1.0664, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.4651705042261731, |
| "grad_norm": 0.190982848405838, |
| "learning_rate": 0.0001072790969248735, |
| "loss": 1.0256, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.46555911784708054, |
| "grad_norm": 0.19910745322704315, |
| "learning_rate": 0.00010720124562086415, |
| "loss": 1.0263, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.46594773146798796, |
| "grad_norm": 0.21624085307121277, |
| "learning_rate": 0.00010712339431685481, |
| "loss": 1.0768, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4663363450888954, |
| "grad_norm": 0.20857703685760498, |
| "learning_rate": 0.00010704554301284547, |
| "loss": 1.0892, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4667249587098028, |
| "grad_norm": 0.21897061169147491, |
| "learning_rate": 0.00010696769170883613, |
| "loss": 1.0873, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.4671135723307102, |
| "grad_norm": 0.1943386346101761, |
| "learning_rate": 0.0001068898404048268, |
| "loss": 1.0116, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4675021859516176, |
| "grad_norm": 0.22607874870300293, |
| "learning_rate": 0.00010681198910081745, |
| "loss": 1.0328, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.467890799572525, |
| "grad_norm": 0.1898999959230423, |
| "learning_rate": 0.0001067341377968081, |
| "loss": 0.9791, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4682794131934324, |
| "grad_norm": 0.2193334400653839, |
| "learning_rate": 0.00010665628649279875, |
| "loss": 1.0742, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.46866802681433983, |
| "grad_norm": 0.2096349149942398, |
| "learning_rate": 0.00010657843518878943, |
| "loss": 1.0683, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.46905664043524725, |
| "grad_norm": 0.2040576934814453, |
| "learning_rate": 0.00010650058388478008, |
| "loss": 1.0516, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.46944525405615467, |
| "grad_norm": 0.20619645714759827, |
| "learning_rate": 0.00010642273258077074, |
| "loss": 1.0429, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.4698338676770621, |
| "grad_norm": 0.19753660261631012, |
| "learning_rate": 0.00010634488127676139, |
| "loss": 1.0268, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.4702224812979695, |
| "grad_norm": 0.2201426476240158, |
| "learning_rate": 0.00010626702997275204, |
| "loss": 1.0879, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4706110949188769, |
| "grad_norm": 0.21307805180549622, |
| "learning_rate": 0.00010618917866874272, |
| "loss": 1.0186, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.47099970853978435, |
| "grad_norm": 0.21142373979091644, |
| "learning_rate": 0.00010611132736473337, |
| "loss": 1.0417, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.4713883221606917, |
| "grad_norm": 0.20523706078529358, |
| "learning_rate": 0.00010603347606072402, |
| "loss": 1.0372, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4717769357815991, |
| "grad_norm": 0.19843094050884247, |
| "learning_rate": 0.00010595562475671468, |
| "loss": 1.0062, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.47216554940250655, |
| "grad_norm": 0.2146739959716797, |
| "learning_rate": 0.00010587777345270533, |
| "loss": 1.0528, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.47255416302341396, |
| "grad_norm": 0.2136303037405014, |
| "learning_rate": 0.00010579992214869601, |
| "loss": 1.0521, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.4729427766443214, |
| "grad_norm": 0.21379397809505463, |
| "learning_rate": 0.00010572207084468666, |
| "loss": 1.0362, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4733313902652288, |
| "grad_norm": 0.20459088683128357, |
| "learning_rate": 0.00010564421954067731, |
| "loss": 1.0455, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4737200038861362, |
| "grad_norm": 0.20667988061904907, |
| "learning_rate": 0.00010556636823666796, |
| "loss": 1.0284, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.47410861750704364, |
| "grad_norm": 0.21820449829101562, |
| "learning_rate": 0.00010548851693265862, |
| "loss": 1.0584, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.47449723112795106, |
| "grad_norm": 0.19705156981945038, |
| "learning_rate": 0.00010541066562864928, |
| "loss": 1.004, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4748858447488584, |
| "grad_norm": 0.19806528091430664, |
| "learning_rate": 0.00010533281432463995, |
| "loss": 1.0519, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.47527445836976584, |
| "grad_norm": 0.2006833702325821, |
| "learning_rate": 0.0001052549630206306, |
| "loss": 1.0119, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.47566307199067326, |
| "grad_norm": 0.21757058799266815, |
| "learning_rate": 0.00010517711171662125, |
| "loss": 1.0961, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4760516856115807, |
| "grad_norm": 0.2015775889158249, |
| "learning_rate": 0.00010509926041261192, |
| "loss": 1.0419, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.4764402992324881, |
| "grad_norm": 0.19691923260688782, |
| "learning_rate": 0.00010502140910860257, |
| "loss": 1.0555, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.4768289128533955, |
| "grad_norm": 0.19924800097942352, |
| "learning_rate": 0.00010494355780459323, |
| "loss": 1.0106, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.47721752647430293, |
| "grad_norm": 0.21416346728801727, |
| "learning_rate": 0.0001048657065005839, |
| "loss": 1.0741, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.47760614009521035, |
| "grad_norm": 0.21823547780513763, |
| "learning_rate": 0.00010478785519657455, |
| "loss": 1.023, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.47799475371611777, |
| "grad_norm": 0.2083735466003418, |
| "learning_rate": 0.0001047100038925652, |
| "loss": 1.0424, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4783833673370252, |
| "grad_norm": 0.2219141572713852, |
| "learning_rate": 0.00010463215258855586, |
| "loss": 1.0839, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.47877198095793255, |
| "grad_norm": 0.21334600448608398, |
| "learning_rate": 0.00010455430128454653, |
| "loss": 0.9888, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.47916059457883997, |
| "grad_norm": 0.2140086442232132, |
| "learning_rate": 0.00010447644998053719, |
| "loss": 1.0119, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.4795492081997474, |
| "grad_norm": 0.25360551476478577, |
| "learning_rate": 0.00010439859867652784, |
| "loss": 1.0026, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4799378218206548, |
| "grad_norm": 0.20200380682945251, |
| "learning_rate": 0.00010432074737251849, |
| "loss": 1.0, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4803264354415622, |
| "grad_norm": 0.22641289234161377, |
| "learning_rate": 0.00010424289606850914, |
| "loss": 1.1022, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.48071504906246965, |
| "grad_norm": 0.20538561046123505, |
| "learning_rate": 0.00010416504476449982, |
| "loss": 0.9847, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.48110366268337706, |
| "grad_norm": 0.206883504986763, |
| "learning_rate": 0.00010408719346049047, |
| "loss": 1.0152, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4814922763042845, |
| "grad_norm": 0.21584320068359375, |
| "learning_rate": 0.00010400934215648113, |
| "loss": 1.0361, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.4818808899251919, |
| "grad_norm": 0.20963703095912933, |
| "learning_rate": 0.00010393149085247178, |
| "loss": 1.0814, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.48226950354609927, |
| "grad_norm": 0.1965872198343277, |
| "learning_rate": 0.00010385363954846243, |
| "loss": 1.0365, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4826581171670067, |
| "grad_norm": 0.2030191719532013, |
| "learning_rate": 0.00010377578824445311, |
| "loss": 1.0374, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.4830467307879141, |
| "grad_norm": 0.21448804438114166, |
| "learning_rate": 0.00010369793694044376, |
| "loss": 0.9686, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.4834353444088215, |
| "grad_norm": 0.2181752622127533, |
| "learning_rate": 0.00010362008563643441, |
| "loss": 1.0812, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.48382395802972894, |
| "grad_norm": 0.19887101650238037, |
| "learning_rate": 0.00010354223433242507, |
| "loss": 1.036, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.48421257165063636, |
| "grad_norm": 0.19007287919521332, |
| "learning_rate": 0.00010346438302841572, |
| "loss": 1.0292, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.4846011852715438, |
| "grad_norm": 0.21390347182750702, |
| "learning_rate": 0.0001033865317244064, |
| "loss": 1.0284, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.4849897988924512, |
| "grad_norm": 0.23822663724422455, |
| "learning_rate": 0.00010330868042039705, |
| "loss": 1.1044, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4853784125133586, |
| "grad_norm": 0.20779070258140564, |
| "learning_rate": 0.0001032308291163877, |
| "loss": 1.0475, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.48576702613426603, |
| "grad_norm": 0.19232134521007538, |
| "learning_rate": 0.00010315297781237835, |
| "loss": 0.9945, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4861556397551734, |
| "grad_norm": 0.22378556430339813, |
| "learning_rate": 0.00010307512650836902, |
| "loss": 1.0462, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4865442533760808, |
| "grad_norm": 0.22156798839569092, |
| "learning_rate": 0.00010299727520435968, |
| "loss": 1.051, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.48693286699698823, |
| "grad_norm": 0.19885733723640442, |
| "learning_rate": 0.00010291942390035034, |
| "loss": 1.0593, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.48732148061789565, |
| "grad_norm": 0.2172418236732483, |
| "learning_rate": 0.000102841572596341, |
| "loss": 1.0513, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.48771009423880307, |
| "grad_norm": 0.22136956453323364, |
| "learning_rate": 0.00010276372129233165, |
| "loss": 1.0438, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4880987078597105, |
| "grad_norm": 0.21337302029132843, |
| "learning_rate": 0.0001026858699883223, |
| "loss": 1.0551, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.4884873214806179, |
| "grad_norm": 0.21376267075538635, |
| "learning_rate": 0.00010260801868431296, |
| "loss": 1.054, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.4888759351015253, |
| "grad_norm": 0.19498860836029053, |
| "learning_rate": 0.00010253016738030364, |
| "loss": 1.0045, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.48926454872243275, |
| "grad_norm": 0.22354961931705475, |
| "learning_rate": 0.00010245231607629429, |
| "loss": 1.096, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.4896531623433401, |
| "grad_norm": 0.2078939527273178, |
| "learning_rate": 0.00010237446477228494, |
| "loss": 1.0102, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.49004177596424753, |
| "grad_norm": 0.20992495119571686, |
| "learning_rate": 0.00010229661346827559, |
| "loss": 0.9814, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.49043038958515495, |
| "grad_norm": 0.2178875207901001, |
| "learning_rate": 0.00010221876216426625, |
| "loss": 1.0489, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.49081900320606237, |
| "grad_norm": 0.22152946889400482, |
| "learning_rate": 0.00010214091086025692, |
| "loss": 1.0808, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4912076168269698, |
| "grad_norm": 0.21179009974002838, |
| "learning_rate": 0.00010206305955624758, |
| "loss": 1.0323, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4915962304478772, |
| "grad_norm": 0.2126997411251068, |
| "learning_rate": 0.00010198520825223823, |
| "loss": 1.0093, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4919848440687846, |
| "grad_norm": 0.20912809669971466, |
| "learning_rate": 0.00010190735694822888, |
| "loss": 1.0343, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.49237345768969204, |
| "grad_norm": 0.2231636494398117, |
| "learning_rate": 0.00010182950564421953, |
| "loss": 1.0587, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.49276207131059946, |
| "grad_norm": 0.1954583376646042, |
| "learning_rate": 0.00010175165434021021, |
| "loss": 0.9566, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.4931506849315068, |
| "grad_norm": 0.20520909130573273, |
| "learning_rate": 0.00010167380303620086, |
| "loss": 1.024, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.49353929855241424, |
| "grad_norm": 0.21736180782318115, |
| "learning_rate": 0.00010159595173219152, |
| "loss": 1.0434, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.49392791217332166, |
| "grad_norm": 0.2360561490058899, |
| "learning_rate": 0.00010151810042818217, |
| "loss": 1.114, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.4943165257942291, |
| "grad_norm": 0.20595967769622803, |
| "learning_rate": 0.00010144024912417282, |
| "loss": 0.9909, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.4947051394151365, |
| "grad_norm": 0.2161860466003418, |
| "learning_rate": 0.0001013623978201635, |
| "loss": 1.0536, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.4950937530360439, |
| "grad_norm": 0.19852355122566223, |
| "learning_rate": 0.00010128454651615415, |
| "loss": 1.0001, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.49548236665695133, |
| "grad_norm": 0.21081402897834778, |
| "learning_rate": 0.0001012066952121448, |
| "loss": 1.0151, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.49587098027785875, |
| "grad_norm": 0.2053362876176834, |
| "learning_rate": 0.00010112884390813547, |
| "loss": 1.018, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.49625959389876617, |
| "grad_norm": 0.21205593645572662, |
| "learning_rate": 0.00010105099260412612, |
| "loss": 0.9912, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.4966482075196736, |
| "grad_norm": 0.2005016952753067, |
| "learning_rate": 0.00010097314130011679, |
| "loss": 1.0069, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.49703682114058095, |
| "grad_norm": 0.21688181161880493, |
| "learning_rate": 0.00010089528999610744, |
| "loss": 1.0364, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.49742543476148837, |
| "grad_norm": 0.20582237839698792, |
| "learning_rate": 0.0001008174386920981, |
| "loss": 1.0138, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4978140483823958, |
| "grad_norm": 0.20824448764324188, |
| "learning_rate": 0.00010073958738808876, |
| "loss": 0.9941, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.4982026620033032, |
| "grad_norm": 0.20749075710773468, |
| "learning_rate": 0.00010066173608407941, |
| "loss": 1.0478, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.49859127562421063, |
| "grad_norm": 0.20012183487415314, |
| "learning_rate": 0.00010058388478007009, |
| "loss": 0.995, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.49897988924511805, |
| "grad_norm": 0.20275959372520447, |
| "learning_rate": 0.00010050603347606074, |
| "loss": 1.097, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.49936850286602547, |
| "grad_norm": 0.19588243961334229, |
| "learning_rate": 0.00010042818217205139, |
| "loss": 1.0, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4997571164869329, |
| "grad_norm": 0.20693185925483704, |
| "learning_rate": 0.00010035033086804204, |
| "loss": 1.0527, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.5001457301078402, |
| "grad_norm": 0.20330573618412018, |
| "learning_rate": 0.0001002724795640327, |
| "loss": 1.0137, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.5005343437287477, |
| "grad_norm": 0.19123876094818115, |
| "learning_rate": 0.00010019462826002337, |
| "loss": 0.9688, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.5009229573496551, |
| "grad_norm": 0.2184276431798935, |
| "learning_rate": 0.00010011677695601403, |
| "loss": 1.0367, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.5013115709705626, |
| "grad_norm": 0.21642108261585236, |
| "learning_rate": 0.00010003892565200468, |
| "loss": 1.102, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5017001845914699, |
| "grad_norm": 0.20351074635982513, |
| "learning_rate": 9.996107434799533e-05, |
| "loss": 1.0327, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.5020887982123774, |
| "grad_norm": 0.22771553695201874, |
| "learning_rate": 9.9883223043986e-05, |
| "loss": 1.104, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.5024774118332848, |
| "grad_norm": 0.2271403968334198, |
| "learning_rate": 9.980537173997665e-05, |
| "loss": 1.1313, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.5028660254541921, |
| "grad_norm": 0.2157830148935318, |
| "learning_rate": 9.97275204359673e-05, |
| "loss": 1.0203, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.5032546390750996, |
| "grad_norm": 0.19555307924747467, |
| "learning_rate": 9.964966913195797e-05, |
| "loss": 1.0194, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.503643252696007, |
| "grad_norm": 0.1898549199104309, |
| "learning_rate": 9.957181782794862e-05, |
| "loss": 1.0034, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.5040318663169144, |
| "grad_norm": 0.23555906116962433, |
| "learning_rate": 9.949396652393928e-05, |
| "loss": 1.0298, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.5044204799378218, |
| "grad_norm": 0.20434850454330444, |
| "learning_rate": 9.941611521992994e-05, |
| "loss": 0.9999, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.5048090935587293, |
| "grad_norm": 0.21015289425849915, |
| "learning_rate": 9.933826391592059e-05, |
| "loss": 1.006, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.5051977071796366, |
| "grad_norm": 0.21147851645946503, |
| "learning_rate": 9.926041261191125e-05, |
| "loss": 1.0854, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5055863208005441, |
| "grad_norm": 0.19666944444179535, |
| "learning_rate": 9.91825613079019e-05, |
| "loss": 1.0057, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.5059749344214515, |
| "grad_norm": 0.21233728528022766, |
| "learning_rate": 9.910471000389257e-05, |
| "loss": 1.0675, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.5063635480423588, |
| "grad_norm": 0.21905581653118134, |
| "learning_rate": 9.902685869988322e-05, |
| "loss": 1.0054, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.5067521616632663, |
| "grad_norm": 0.23434993624687195, |
| "learning_rate": 9.894900739587389e-05, |
| "loss": 0.9915, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.5071407752841737, |
| "grad_norm": 0.21684227883815765, |
| "learning_rate": 9.887115609186454e-05, |
| "loss": 1.1131, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5075293889050811, |
| "grad_norm": 0.21699552237987518, |
| "learning_rate": 9.87933047878552e-05, |
| "loss": 1.0782, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.5079180025259885, |
| "grad_norm": 0.2218221127986908, |
| "learning_rate": 9.871545348384586e-05, |
| "loss": 1.0388, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.508306616146896, |
| "grad_norm": 0.20104359090328217, |
| "learning_rate": 9.863760217983652e-05, |
| "loss": 1.0336, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5086952297678033, |
| "grad_norm": 0.21907050907611847, |
| "learning_rate": 9.855975087582718e-05, |
| "loss": 1.0587, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5090838433887108, |
| "grad_norm": 0.2140391767024994, |
| "learning_rate": 9.848189957181784e-05, |
| "loss": 1.0351, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5094724570096182, |
| "grad_norm": 0.33287563920021057, |
| "learning_rate": 9.84040482678085e-05, |
| "loss": 0.9908, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5098610706305255, |
| "grad_norm": 0.2706705927848816, |
| "learning_rate": 9.832619696379915e-05, |
| "loss": 1.0078, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.510249684251433, |
| "grad_norm": 0.20216278731822968, |
| "learning_rate": 9.824834565978981e-05, |
| "loss": 1.0253, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 0.20736576616764069, |
| "learning_rate": 9.817049435578046e-05, |
| "loss": 1.0217, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5110269114932479, |
| "grad_norm": 0.2275344580411911, |
| "learning_rate": 9.809264305177113e-05, |
| "loss": 1.0139, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5114155251141552, |
| "grad_norm": 0.22243620455265045, |
| "learning_rate": 9.801479174776178e-05, |
| "loss": 1.0427, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5118041387350627, |
| "grad_norm": 0.198841854929924, |
| "learning_rate": 9.793694044375243e-05, |
| "loss": 1.0231, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5121927523559701, |
| "grad_norm": 0.2031068503856659, |
| "learning_rate": 9.78590891397431e-05, |
| "loss": 1.0184, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.5125813659768775, |
| "grad_norm": 0.21712587773799896, |
| "learning_rate": 9.778123783573375e-05, |
| "loss": 1.0205, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.5129699795977849, |
| "grad_norm": 0.19366060197353363, |
| "learning_rate": 9.77033865317244e-05, |
| "loss": 0.9623, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5133585932186923, |
| "grad_norm": 0.19845952093601227, |
| "learning_rate": 9.762553522771507e-05, |
| "loss": 1.0209, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.5137472068395997, |
| "grad_norm": 0.19700276851654053, |
| "learning_rate": 9.754768392370572e-05, |
| "loss": 0.9506, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.5141358204605071, |
| "grad_norm": 0.19797460734844208, |
| "learning_rate": 9.746983261969639e-05, |
| "loss": 1.0928, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.5145244340814146, |
| "grad_norm": 0.20470699667930603, |
| "learning_rate": 9.739198131568704e-05, |
| "loss": 1.0835, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.5149130477023219, |
| "grad_norm": 0.19121742248535156, |
| "learning_rate": 9.731413001167769e-05, |
| "loss": 0.9877, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5153016613232294, |
| "grad_norm": 0.20026616752147675, |
| "learning_rate": 9.723627870766836e-05, |
| "loss": 1.0094, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.5156902749441368, |
| "grad_norm": 0.2214539796113968, |
| "learning_rate": 9.715842740365901e-05, |
| "loss": 0.9867, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.5160788885650442, |
| "grad_norm": 0.22674603760242462, |
| "learning_rate": 9.708057609964967e-05, |
| "loss": 1.0738, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.5164675021859516, |
| "grad_norm": 0.21274834871292114, |
| "learning_rate": 9.700272479564033e-05, |
| "loss": 1.0458, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.5168561158068591, |
| "grad_norm": 0.20305052399635315, |
| "learning_rate": 9.692487349163099e-05, |
| "loss": 1.0041, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5172447294277664, |
| "grad_norm": 0.1840772181749344, |
| "learning_rate": 9.684702218762166e-05, |
| "loss": 0.9498, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.5176333430486738, |
| "grad_norm": 0.2055782824754715, |
| "learning_rate": 9.676917088361231e-05, |
| "loss": 1.0223, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.5180219566695813, |
| "grad_norm": 0.21826402842998505, |
| "learning_rate": 9.669131957960297e-05, |
| "loss": 1.1068, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.5184105702904886, |
| "grad_norm": 0.22516922652721405, |
| "learning_rate": 9.661346827559363e-05, |
| "loss": 1.0957, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.5187991839113961, |
| "grad_norm": 0.21044284105300903, |
| "learning_rate": 9.653561697158428e-05, |
| "loss": 1.0384, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.5191877975323035, |
| "grad_norm": 0.20275571942329407, |
| "learning_rate": 9.645776566757494e-05, |
| "loss": 0.9978, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.519576411153211, |
| "grad_norm": 0.2077122926712036, |
| "learning_rate": 9.63799143635656e-05, |
| "loss": 1.0418, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.5199650247741183, |
| "grad_norm": 0.19158867001533508, |
| "learning_rate": 9.630206305955625e-05, |
| "loss": 1.0527, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.5203536383950258, |
| "grad_norm": 0.1932496577501297, |
| "learning_rate": 9.622421175554691e-05, |
| "loss": 1.0039, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.5207422520159332, |
| "grad_norm": 0.21937766671180725, |
| "learning_rate": 9.614636045153757e-05, |
| "loss": 1.0373, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5211308656368405, |
| "grad_norm": 0.2268432229757309, |
| "learning_rate": 9.606850914752823e-05, |
| "loss": 1.0815, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.521519479257748, |
| "grad_norm": 0.2147454470396042, |
| "learning_rate": 9.599065784351888e-05, |
| "loss": 1.0331, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.5219080928786554, |
| "grad_norm": 0.19899709522724152, |
| "learning_rate": 9.591280653950954e-05, |
| "loss": 1.032, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.5222967064995628, |
| "grad_norm": 0.19646069407463074, |
| "learning_rate": 9.58349552355002e-05, |
| "loss": 0.9788, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.5226853201204702, |
| "grad_norm": 0.2146075963973999, |
| "learning_rate": 9.575710393149085e-05, |
| "loss": 1.0201, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.5230739337413777, |
| "grad_norm": 0.1968650370836258, |
| "learning_rate": 9.567925262748152e-05, |
| "loss": 0.9894, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.523462547362285, |
| "grad_norm": 0.21111296117305756, |
| "learning_rate": 9.560140132347217e-05, |
| "loss": 1.0961, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.5238511609831925, |
| "grad_norm": 0.20917272567749023, |
| "learning_rate": 9.552355001946282e-05, |
| "loss": 1.0435, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.5242397746040999, |
| "grad_norm": 0.2029752880334854, |
| "learning_rate": 9.544569871545349e-05, |
| "loss": 1.0328, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.5246283882250072, |
| "grad_norm": 0.20726613700389862, |
| "learning_rate": 9.536784741144414e-05, |
| "loss": 1.0465, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5250170018459147, |
| "grad_norm": 0.19778740406036377, |
| "learning_rate": 9.52899961074348e-05, |
| "loss": 1.0058, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.5254056154668221, |
| "grad_norm": 0.19958540797233582, |
| "learning_rate": 9.521214480342546e-05, |
| "loss": 1.0164, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.5257942290877295, |
| "grad_norm": 0.2151395082473755, |
| "learning_rate": 9.513429349941611e-05, |
| "loss": 1.0703, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.5261828427086369, |
| "grad_norm": 0.2366979569196701, |
| "learning_rate": 9.505644219540678e-05, |
| "loss": 0.9832, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.5265714563295444, |
| "grad_norm": 0.22064165771007538, |
| "learning_rate": 9.497859089139743e-05, |
| "loss": 1.0181, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5269600699504517, |
| "grad_norm": 0.20221936702728271, |
| "learning_rate": 9.49007395873881e-05, |
| "loss": 1.0424, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.5273486835713592, |
| "grad_norm": 0.19608759880065918, |
| "learning_rate": 9.482288828337876e-05, |
| "loss": 1.0074, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.5277372971922666, |
| "grad_norm": 0.20686689019203186, |
| "learning_rate": 9.474503697936941e-05, |
| "loss": 1.0213, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.528125910813174, |
| "grad_norm": 0.223610520362854, |
| "learning_rate": 9.466718567536008e-05, |
| "loss": 1.05, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.5285145244340814, |
| "grad_norm": 0.2135966569185257, |
| "learning_rate": 9.458933437135073e-05, |
| "loss": 1.034, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5289031380549888, |
| "grad_norm": 0.1933239996433258, |
| "learning_rate": 9.451148306734138e-05, |
| "loss": 0.9883, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.5292917516758963, |
| "grad_norm": 0.20794694125652313, |
| "learning_rate": 9.443363176333205e-05, |
| "loss": 1.0103, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.5296803652968036, |
| "grad_norm": 0.20128493010997772, |
| "learning_rate": 9.43557804593227e-05, |
| "loss": 1.015, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.5300689789177111, |
| "grad_norm": 0.2128933072090149, |
| "learning_rate": 9.427792915531336e-05, |
| "loss": 1.0038, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.5304575925386185, |
| "grad_norm": 0.2046983689069748, |
| "learning_rate": 9.420007785130402e-05, |
| "loss": 0.9948, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5308462061595259, |
| "grad_norm": 0.20909680426120758, |
| "learning_rate": 9.412222654729467e-05, |
| "loss": 1.0308, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.5312348197804333, |
| "grad_norm": 0.2182164192199707, |
| "learning_rate": 9.404437524328533e-05, |
| "loss": 1.0018, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.5316234334013407, |
| "grad_norm": 0.2107028216123581, |
| "learning_rate": 9.396652393927599e-05, |
| "loss": 1.0419, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.5320120470222481, |
| "grad_norm": 0.24631445109844208, |
| "learning_rate": 9.388867263526665e-05, |
| "loss": 1.0171, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.5324006606431555, |
| "grad_norm": 0.20331013202667236, |
| "learning_rate": 9.38108213312573e-05, |
| "loss": 1.0592, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.532789274264063, |
| "grad_norm": 0.19266058504581451, |
| "learning_rate": 9.373297002724796e-05, |
| "loss": 0.9912, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.5331778878849703, |
| "grad_norm": 0.22874227166175842, |
| "learning_rate": 9.365511872323862e-05, |
| "loss": 1.0533, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.5335665015058778, |
| "grad_norm": 0.2088235765695572, |
| "learning_rate": 9.357726741922927e-05, |
| "loss": 1.0464, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.5339551151267852, |
| "grad_norm": 0.2112397700548172, |
| "learning_rate": 9.349941611521994e-05, |
| "loss": 1.0503, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.5343437287476926, |
| "grad_norm": 0.20712170004844666, |
| "learning_rate": 9.342156481121059e-05, |
| "loss": 1.0237, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5347323423686, |
| "grad_norm": 0.20077116787433624, |
| "learning_rate": 9.334371350720124e-05, |
| "loss": 1.0467, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.5351209559895075, |
| "grad_norm": 0.20394501090049744, |
| "learning_rate": 9.326586220319191e-05, |
| "loss": 1.0054, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.5355095696104148, |
| "grad_norm": 0.19459395110607147, |
| "learning_rate": 9.318801089918256e-05, |
| "loss": 0.9792, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.5358981832313222, |
| "grad_norm": 0.2116049826145172, |
| "learning_rate": 9.311015959517321e-05, |
| "loss": 1.0345, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.5362867968522297, |
| "grad_norm": 0.21672269701957703, |
| "learning_rate": 9.303230829116388e-05, |
| "loss": 1.0709, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.536675410473137, |
| "grad_norm": 0.20358407497406006, |
| "learning_rate": 9.295445698715453e-05, |
| "loss": 1.0534, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.5370640240940445, |
| "grad_norm": 0.19512853026390076, |
| "learning_rate": 9.28766056831452e-05, |
| "loss": 0.9397, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.5374526377149519, |
| "grad_norm": 0.2140122503042221, |
| "learning_rate": 9.279875437913586e-05, |
| "loss": 1.0164, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.5378412513358594, |
| "grad_norm": 0.20486049354076385, |
| "learning_rate": 9.272090307512651e-05, |
| "loss": 0.9892, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.5382298649567667, |
| "grad_norm": 0.20023222267627716, |
| "learning_rate": 9.264305177111718e-05, |
| "loss": 1.0019, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5386184785776742, |
| "grad_norm": 0.20024439692497253, |
| "learning_rate": 9.256520046710783e-05, |
| "loss": 0.9717, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.5390070921985816, |
| "grad_norm": 0.21021386981010437, |
| "learning_rate": 9.24873491630985e-05, |
| "loss": 1.028, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.5393957058194889, |
| "grad_norm": 0.18508704006671906, |
| "learning_rate": 9.240949785908915e-05, |
| "loss": 1.0008, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.5397843194403964, |
| "grad_norm": 0.19351208209991455, |
| "learning_rate": 9.23316465550798e-05, |
| "loss": 0.9898, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.5401729330613038, |
| "grad_norm": 0.20341919362545013, |
| "learning_rate": 9.225379525107047e-05, |
| "loss": 1.0203, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5405615466822112, |
| "grad_norm": 0.1942797303199768, |
| "learning_rate": 9.217594394706112e-05, |
| "loss": 1.003, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.5409501603031186, |
| "grad_norm": 0.2056138813495636, |
| "learning_rate": 9.209809264305178e-05, |
| "loss": 1.0149, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.5413387739240261, |
| "grad_norm": 0.21572062373161316, |
| "learning_rate": 9.202024133904244e-05, |
| "loss": 0.9808, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5417273875449334, |
| "grad_norm": 0.19841499626636505, |
| "learning_rate": 9.194239003503309e-05, |
| "loss": 1.0467, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.5421160011658409, |
| "grad_norm": 0.20452147722244263, |
| "learning_rate": 9.186453873102375e-05, |
| "loss": 1.0378, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5425046147867483, |
| "grad_norm": 0.2090451419353485, |
| "learning_rate": 9.17866874270144e-05, |
| "loss": 1.0823, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5428932284076556, |
| "grad_norm": 0.215814009308815, |
| "learning_rate": 9.170883612300506e-05, |
| "loss": 1.0994, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5432818420285631, |
| "grad_norm": 0.19924724102020264, |
| "learning_rate": 9.163098481899572e-05, |
| "loss": 1.0099, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.5436704556494705, |
| "grad_norm": 0.20074865221977234, |
| "learning_rate": 9.155313351498638e-05, |
| "loss": 1.0163, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.544059069270378, |
| "grad_norm": 0.21737203001976013, |
| "learning_rate": 9.147528221097704e-05, |
| "loss": 1.0527, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5444476828912853, |
| "grad_norm": 0.2036885768175125, |
| "learning_rate": 9.139743090696769e-05, |
| "loss": 1.0208, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5448362965121928, |
| "grad_norm": 0.20861585438251495, |
| "learning_rate": 9.131957960295835e-05, |
| "loss": 1.0175, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.5452249101331001, |
| "grad_norm": 0.23425570130348206, |
| "learning_rate": 9.124172829894901e-05, |
| "loss": 1.053, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.5456135237540076, |
| "grad_norm": 0.20389291644096375, |
| "learning_rate": 9.116387699493966e-05, |
| "loss": 1.0479, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.546002137374915, |
| "grad_norm": 0.20166678726673126, |
| "learning_rate": 9.108602569093033e-05, |
| "loss": 1.0064, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5463907509958223, |
| "grad_norm": 0.21419203281402588, |
| "learning_rate": 9.100817438692098e-05, |
| "loss": 1.0122, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5467793646167298, |
| "grad_norm": 0.20541758835315704, |
| "learning_rate": 9.093032308291165e-05, |
| "loss": 1.0355, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.5471679782376372, |
| "grad_norm": 0.21865367889404297, |
| "learning_rate": 9.08524717789023e-05, |
| "loss": 1.0201, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5475565918585447, |
| "grad_norm": 0.21181468665599823, |
| "learning_rate": 9.077462047489296e-05, |
| "loss": 1.0501, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.547945205479452, |
| "grad_norm": 0.21016767621040344, |
| "learning_rate": 9.069676917088362e-05, |
| "loss": 1.0452, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5483338191003595, |
| "grad_norm": 0.21119755506515503, |
| "learning_rate": 9.061891786687428e-05, |
| "loss": 1.0935, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.5487224327212669, |
| "grad_norm": 0.20688095688819885, |
| "learning_rate": 9.054106656286493e-05, |
| "loss": 1.0526, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5491110463421743, |
| "grad_norm": 0.21857528388500214, |
| "learning_rate": 9.04632152588556e-05, |
| "loss": 1.0067, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5494996599630817, |
| "grad_norm": 0.2196548581123352, |
| "learning_rate": 9.038536395484625e-05, |
| "loss": 1.0263, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.5498882735839892, |
| "grad_norm": 0.21952040493488312, |
| "learning_rate": 9.03075126508369e-05, |
| "loss": 1.0009, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5502768872048965, |
| "grad_norm": 0.20059294998645782, |
| "learning_rate": 9.022966134682757e-05, |
| "loss": 1.0481, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.5506655008258039, |
| "grad_norm": 0.1960824728012085, |
| "learning_rate": 9.015181004281822e-05, |
| "loss": 1.0003, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5510541144467114, |
| "grad_norm": 0.19051724672317505, |
| "learning_rate": 9.007395873880889e-05, |
| "loss": 0.9556, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.5514427280676187, |
| "grad_norm": 0.21008028090000153, |
| "learning_rate": 8.999610743479954e-05, |
| "loss": 1.0457, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5518313416885262, |
| "grad_norm": 0.21465444564819336, |
| "learning_rate": 8.991825613079019e-05, |
| "loss": 1.0196, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5522199553094336, |
| "grad_norm": 0.2062770277261734, |
| "learning_rate": 8.984040482678086e-05, |
| "loss": 1.0501, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.552608568930341, |
| "grad_norm": 0.21400012075901031, |
| "learning_rate": 8.976255352277151e-05, |
| "loss": 1.0711, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5529971825512484, |
| "grad_norm": 0.19617624580860138, |
| "learning_rate": 8.968470221876217e-05, |
| "loss": 0.9858, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.5533857961721559, |
| "grad_norm": 0.20835624635219574, |
| "learning_rate": 8.960685091475283e-05, |
| "loss": 1.0122, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.5537744097930632, |
| "grad_norm": 0.21708111464977264, |
| "learning_rate": 8.952899961074348e-05, |
| "loss": 1.0108, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5541630234139706, |
| "grad_norm": 0.20877864956855774, |
| "learning_rate": 8.945114830673414e-05, |
| "loss": 1.0389, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5545516370348781, |
| "grad_norm": 0.1924441158771515, |
| "learning_rate": 8.93732970027248e-05, |
| "loss": 1.0088, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5549402506557854, |
| "grad_norm": 0.20288826525211334, |
| "learning_rate": 8.929544569871546e-05, |
| "loss": 1.0296, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5553288642766929, |
| "grad_norm": 0.2008143663406372, |
| "learning_rate": 8.921759439470611e-05, |
| "loss": 1.0521, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5557174778976003, |
| "grad_norm": 0.24407047033309937, |
| "learning_rate": 8.913974309069677e-05, |
| "loss": 1.1038, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5561060915185078, |
| "grad_norm": 0.2172536998987198, |
| "learning_rate": 8.906189178668743e-05, |
| "loss": 1.0811, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.5564947051394151, |
| "grad_norm": 0.21712054312229156, |
| "learning_rate": 8.898404048267808e-05, |
| "loss": 1.0642, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.5568833187603226, |
| "grad_norm": 0.22482797503471375, |
| "learning_rate": 8.890618917866875e-05, |
| "loss": 1.0742, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.55727193238123, |
| "grad_norm": 0.1974876970052719, |
| "learning_rate": 8.88283378746594e-05, |
| "loss": 0.9954, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5576605460021373, |
| "grad_norm": 0.19162166118621826, |
| "learning_rate": 8.875048657065007e-05, |
| "loss": 1.0074, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5580491596230448, |
| "grad_norm": 0.20439045131206512, |
| "learning_rate": 8.867263526664072e-05, |
| "loss": 1.026, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5584377732439522, |
| "grad_norm": 0.1947651207447052, |
| "learning_rate": 8.859478396263138e-05, |
| "loss": 0.9848, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.5588263868648596, |
| "grad_norm": 0.21434316039085388, |
| "learning_rate": 8.851693265862204e-05, |
| "loss": 1.0843, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.559215000485767, |
| "grad_norm": 1.3314417600631714, |
| "learning_rate": 8.84390813546127e-05, |
| "loss": 1.0356, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.5596036141066745, |
| "grad_norm": 0.20131289958953857, |
| "learning_rate": 8.836123005060335e-05, |
| "loss": 1.0214, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5599922277275818, |
| "grad_norm": 0.21596461534500122, |
| "learning_rate": 8.828337874659402e-05, |
| "loss": 1.0962, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5603808413484893, |
| "grad_norm": 0.20477193593978882, |
| "learning_rate": 8.820552744258467e-05, |
| "loss": 1.0643, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.5607694549693967, |
| "grad_norm": 0.1978107988834381, |
| "learning_rate": 8.812767613857532e-05, |
| "loss": 1.0054, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.561158068590304, |
| "grad_norm": 0.219422847032547, |
| "learning_rate": 8.804982483456599e-05, |
| "loss": 1.0009, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.5615466822112115, |
| "grad_norm": 0.21489015221595764, |
| "learning_rate": 8.797197353055664e-05, |
| "loss": 1.052, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5619352958321189, |
| "grad_norm": 0.2235930860042572, |
| "learning_rate": 8.78941222265473e-05, |
| "loss": 1.037, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5623239094530263, |
| "grad_norm": 0.19922038912773132, |
| "learning_rate": 8.781627092253796e-05, |
| "loss": 1.0006, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5627125230739337, |
| "grad_norm": 0.24740247428417206, |
| "learning_rate": 8.773841961852861e-05, |
| "loss": 1.0753, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.5631011366948412, |
| "grad_norm": 0.2148803174495697, |
| "learning_rate": 8.766056831451928e-05, |
| "loss": 1.0712, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5634897503157485, |
| "grad_norm": 0.19838745892047882, |
| "learning_rate": 8.758271701050993e-05, |
| "loss": 1.027, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.563878363936656, |
| "grad_norm": 0.20328201353549957, |
| "learning_rate": 8.750486570650058e-05, |
| "loss": 1.0117, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5642669775575634, |
| "grad_norm": 0.21230114996433258, |
| "learning_rate": 8.742701440249125e-05, |
| "loss": 1.0658, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5646555911784708, |
| "grad_norm": 0.2030259519815445, |
| "learning_rate": 8.73491630984819e-05, |
| "loss": 1.0002, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5650442047993782, |
| "grad_norm": 0.21404659748077393, |
| "learning_rate": 8.727131179447256e-05, |
| "loss": 1.0572, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5654328184202856, |
| "grad_norm": 0.2148464322090149, |
| "learning_rate": 8.719346049046322e-05, |
| "loss": 1.0164, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5658214320411931, |
| "grad_norm": 0.22083118557929993, |
| "learning_rate": 8.711560918645387e-05, |
| "loss": 0.9704, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5662100456621004, |
| "grad_norm": 0.19305935502052307, |
| "learning_rate": 8.703775788244453e-05, |
| "loss": 1.0034, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5665986592830079, |
| "grad_norm": 0.2100098729133606, |
| "learning_rate": 8.695990657843518e-05, |
| "loss": 1.0907, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5669872729039153, |
| "grad_norm": 0.18947799503803253, |
| "learning_rate": 8.688205527442585e-05, |
| "loss": 0.9664, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 0.22341710329055786, |
| "learning_rate": 8.68042039704165e-05, |
| "loss": 1.0551, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5677645001457301, |
| "grad_norm": 0.219679057598114, |
| "learning_rate": 8.672635266640717e-05, |
| "loss": 1.0398, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5681531137666376, |
| "grad_norm": 0.22389841079711914, |
| "learning_rate": 8.664850136239782e-05, |
| "loss": 1.0472, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5685417273875449, |
| "grad_norm": 0.21402975916862488, |
| "learning_rate": 8.657065005838849e-05, |
| "loss": 1.0224, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.5689303410084523, |
| "grad_norm": 0.20917154848575592, |
| "learning_rate": 8.649279875437915e-05, |
| "loss": 1.0526, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.5693189546293598, |
| "grad_norm": 0.2252056896686554, |
| "learning_rate": 8.64149474503698e-05, |
| "loss": 1.1064, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5697075682502671, |
| "grad_norm": 0.21834802627563477, |
| "learning_rate": 8.633709614636046e-05, |
| "loss": 1.0318, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5700961818711746, |
| "grad_norm": 0.21882353723049164, |
| "learning_rate": 8.625924484235112e-05, |
| "loss": 1.0285, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.570484795492082, |
| "grad_norm": 0.2028426229953766, |
| "learning_rate": 8.618139353834177e-05, |
| "loss": 1.0356, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5708734091129894, |
| "grad_norm": 0.22297166287899017, |
| "learning_rate": 8.610354223433243e-05, |
| "loss": 1.0804, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5712620227338968, |
| "grad_norm": 0.21775268018245697, |
| "learning_rate": 8.602569093032309e-05, |
| "loss": 0.9978, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5716506363548043, |
| "grad_norm": 0.20362353324890137, |
| "learning_rate": 8.594783962631374e-05, |
| "loss": 0.9982, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5720392499757117, |
| "grad_norm": 0.21854591369628906, |
| "learning_rate": 8.586998832230441e-05, |
| "loss": 1.0465, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.572427863596619, |
| "grad_norm": 0.20501428842544556, |
| "learning_rate": 8.579213701829506e-05, |
| "loss": 1.0468, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5728164772175265, |
| "grad_norm": 0.21606214344501495, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 1.0477, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5732050908384339, |
| "grad_norm": 0.2100660502910614, |
| "learning_rate": 8.563643441027638e-05, |
| "loss": 1.0071, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5735937044593413, |
| "grad_norm": 0.21008896827697754, |
| "learning_rate": 8.555858310626703e-05, |
| "loss": 0.9914, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5739823180802487, |
| "grad_norm": 0.22192159295082092, |
| "learning_rate": 8.54807318022577e-05, |
| "loss": 1.0385, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5743709317011562, |
| "grad_norm": 0.20123356580734253, |
| "learning_rate": 8.540288049824835e-05, |
| "loss": 1.0062, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5747595453220635, |
| "grad_norm": 0.201947420835495, |
| "learning_rate": 8.5325029194239e-05, |
| "loss": 1.0218, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.575148158942971, |
| "grad_norm": 0.22804415225982666, |
| "learning_rate": 8.524717789022967e-05, |
| "loss": 1.0445, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5755367725638784, |
| "grad_norm": 0.20527036488056183, |
| "learning_rate": 8.516932658622032e-05, |
| "loss": 0.9972, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5759253861847857, |
| "grad_norm": 0.20298773050308228, |
| "learning_rate": 8.509147528221098e-05, |
| "loss": 1.0272, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5763139998056932, |
| "grad_norm": 0.22500957548618317, |
| "learning_rate": 8.501362397820164e-05, |
| "loss": 1.0982, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5767026134266006, |
| "grad_norm": 0.1950521320104599, |
| "learning_rate": 8.493577267419229e-05, |
| "loss": 0.9848, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.577091227047508, |
| "grad_norm": 0.21087585389614105, |
| "learning_rate": 8.485792137018295e-05, |
| "loss": 1.0125, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5774798406684154, |
| "grad_norm": 0.20122238993644714, |
| "learning_rate": 8.47800700661736e-05, |
| "loss": 1.0533, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5778684542893229, |
| "grad_norm": 0.20149008929729462, |
| "learning_rate": 8.470221876216427e-05, |
| "loss": 1.0719, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5782570679102302, |
| "grad_norm": 0.21307213604450226, |
| "learning_rate": 8.462436745815494e-05, |
| "loss": 1.0522, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5786456815311377, |
| "grad_norm": 0.21828554570674896, |
| "learning_rate": 8.454651615414559e-05, |
| "loss": 1.0184, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5790342951520451, |
| "grad_norm": 0.22002705931663513, |
| "learning_rate": 8.446866485013625e-05, |
| "loss": 1.0101, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5794229087729524, |
| "grad_norm": 0.19479142129421234, |
| "learning_rate": 8.43908135461269e-05, |
| "loss": 0.9889, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5798115223938599, |
| "grad_norm": 0.21346086263656616, |
| "learning_rate": 8.431296224211756e-05, |
| "loss": 1.0373, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5802001360147673, |
| "grad_norm": 0.20177558064460754, |
| "learning_rate": 8.423511093810822e-05, |
| "loss": 1.0215, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5805887496356748, |
| "grad_norm": 0.2117915153503418, |
| "learning_rate": 8.415725963409888e-05, |
| "loss": 1.0321, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5809773632565821, |
| "grad_norm": 0.21304374933242798, |
| "learning_rate": 8.407940833008954e-05, |
| "loss": 1.0123, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5813659768774896, |
| "grad_norm": 0.21173715591430664, |
| "learning_rate": 8.400155702608019e-05, |
| "loss": 1.0696, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.581754590498397, |
| "grad_norm": 0.20407019555568695, |
| "learning_rate": 8.392370572207085e-05, |
| "loss": 1.0086, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5821432041193044, |
| "grad_norm": 0.209481880068779, |
| "learning_rate": 8.384585441806151e-05, |
| "loss": 0.9975, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5825318177402118, |
| "grad_norm": 0.22184531390666962, |
| "learning_rate": 8.376800311405216e-05, |
| "loss": 1.0956, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5829204313611193, |
| "grad_norm": 0.21344684064388275, |
| "learning_rate": 8.369015181004283e-05, |
| "loss": 1.0685, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5833090449820266, |
| "grad_norm": 0.19837221503257751, |
| "learning_rate": 8.361230050603348e-05, |
| "loss": 1.0149, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.583697658602934, |
| "grad_norm": 0.2133672833442688, |
| "learning_rate": 8.353444920202413e-05, |
| "loss": 1.0453, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5840862722238415, |
| "grad_norm": 0.21944090723991394, |
| "learning_rate": 8.34565978980148e-05, |
| "loss": 1.04, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.5844748858447488, |
| "grad_norm": 0.1983667016029358, |
| "learning_rate": 8.337874659400545e-05, |
| "loss": 0.9919, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5848634994656563, |
| "grad_norm": 0.2025303989648819, |
| "learning_rate": 8.33008952899961e-05, |
| "loss": 1.0021, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5852521130865637, |
| "grad_norm": 0.2015170007944107, |
| "learning_rate": 8.322304398598677e-05, |
| "loss": 0.9945, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5856407267074711, |
| "grad_norm": 0.20768272876739502, |
| "learning_rate": 8.314519268197742e-05, |
| "loss": 1.0465, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5860293403283785, |
| "grad_norm": 0.20513412356376648, |
| "learning_rate": 8.306734137796809e-05, |
| "loss": 1.0124, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.586417953949286, |
| "grad_norm": 0.20268471539020538, |
| "learning_rate": 8.298949007395874e-05, |
| "loss": 1.0586, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5868065675701933, |
| "grad_norm": 0.20915938913822174, |
| "learning_rate": 8.291163876994939e-05, |
| "loss": 1.0047, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5871951811911007, |
| "grad_norm": 0.2161451131105423, |
| "learning_rate": 8.283378746594006e-05, |
| "loss": 1.0184, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5875837948120082, |
| "grad_norm": 0.1915571093559265, |
| "learning_rate": 8.275593616193071e-05, |
| "loss": 1.0187, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5879724084329155, |
| "grad_norm": 0.20907992124557495, |
| "learning_rate": 8.267808485792137e-05, |
| "loss": 1.0212, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.588361022053823, |
| "grad_norm": 0.20140786468982697, |
| "learning_rate": 8.260023355391204e-05, |
| "loss": 1.014, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5887496356747304, |
| "grad_norm": 0.208252415060997, |
| "learning_rate": 8.252238224990269e-05, |
| "loss": 1.0806, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5891382492956379, |
| "grad_norm": 0.20596125721931458, |
| "learning_rate": 8.244453094589336e-05, |
| "loss": 0.9823, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5895268629165452, |
| "grad_norm": 0.18832452595233917, |
| "learning_rate": 8.236667964188401e-05, |
| "loss": 0.9925, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5899154765374527, |
| "grad_norm": 0.2078334391117096, |
| "learning_rate": 8.228882833787467e-05, |
| "loss": 1.0587, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.59030409015836, |
| "grad_norm": 0.20121365785598755, |
| "learning_rate": 8.221097703386533e-05, |
| "loss": 1.0607, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5906927037792674, |
| "grad_norm": 0.19666099548339844, |
| "learning_rate": 8.213312572985598e-05, |
| "loss": 1.0124, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5910813174001749, |
| "grad_norm": 0.20176006853580475, |
| "learning_rate": 8.205527442584664e-05, |
| "loss": 1.0297, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5914699310210823, |
| "grad_norm": 0.2038574516773224, |
| "learning_rate": 8.19774231218373e-05, |
| "loss": 1.0311, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.5918585446419897, |
| "grad_norm": 0.19517424702644348, |
| "learning_rate": 8.189957181782796e-05, |
| "loss": 0.9945, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5922471582628971, |
| "grad_norm": 0.19599094986915588, |
| "learning_rate": 8.182172051381861e-05, |
| "loss": 1.0255, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.5926357718838046, |
| "grad_norm": 0.21409402787685394, |
| "learning_rate": 8.174386920980927e-05, |
| "loss": 1.0868, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.5930243855047119, |
| "grad_norm": 0.19567830860614777, |
| "learning_rate": 8.166601790579993e-05, |
| "loss": 0.9654, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5934129991256194, |
| "grad_norm": 0.2275007963180542, |
| "learning_rate": 8.158816660179058e-05, |
| "loss": 1.0867, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5938016127465268, |
| "grad_norm": 0.19826427102088928, |
| "learning_rate": 8.151031529778123e-05, |
| "loss": 1.0301, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.5941902263674341, |
| "grad_norm": 0.2051352709531784, |
| "learning_rate": 8.14324639937719e-05, |
| "loss": 1.023, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5945788399883416, |
| "grad_norm": 0.19492043554782867, |
| "learning_rate": 8.135461268976255e-05, |
| "loss": 0.9608, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.594967453609249, |
| "grad_norm": 0.21521608531475067, |
| "learning_rate": 8.127676138575322e-05, |
| "loss": 1.0612, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5953560672301564, |
| "grad_norm": 0.22739367187023163, |
| "learning_rate": 8.119891008174387e-05, |
| "loss": 1.0603, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 0.20334595441818237, |
| "learning_rate": 8.112105877773452e-05, |
| "loss": 1.0191, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5961332944719713, |
| "grad_norm": 0.20985397696495056, |
| "learning_rate": 8.104320747372519e-05, |
| "loss": 1.0721, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5965219080928786, |
| "grad_norm": 0.20472954213619232, |
| "learning_rate": 8.096535616971584e-05, |
| "loss": 1.0556, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5969105217137861, |
| "grad_norm": 0.2112964689731598, |
| "learning_rate": 8.08875048657065e-05, |
| "loss": 1.0016, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5972991353346935, |
| "grad_norm": 0.21330617368221283, |
| "learning_rate": 8.080965356169716e-05, |
| "loss": 1.0783, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.5976877489556008, |
| "grad_norm": 0.20907814800739288, |
| "learning_rate": 8.073180225768782e-05, |
| "loss": 1.071, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.5980763625765083, |
| "grad_norm": 0.2038964033126831, |
| "learning_rate": 8.065395095367848e-05, |
| "loss": 1.0039, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5984649761974157, |
| "grad_norm": 0.2175542712211609, |
| "learning_rate": 8.057609964966914e-05, |
| "loss": 1.0015, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5988535898183232, |
| "grad_norm": 0.21474529802799225, |
| "learning_rate": 8.049824834565979e-05, |
| "loss": 1.0273, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.5992422034392305, |
| "grad_norm": 0.21428482234477997, |
| "learning_rate": 8.042039704165046e-05, |
| "loss": 1.0767, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.599630817060138, |
| "grad_norm": 0.20287524163722992, |
| "learning_rate": 8.034254573764111e-05, |
| "loss": 1.064, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.6000194306810454, |
| "grad_norm": 0.20689848065376282, |
| "learning_rate": 8.026469443363178e-05, |
| "loss": 1.0084, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.6004080443019528, |
| "grad_norm": 0.22451332211494446, |
| "learning_rate": 8.018684312962243e-05, |
| "loss": 1.1039, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.6007966579228602, |
| "grad_norm": 0.21381956338882446, |
| "learning_rate": 8.010899182561308e-05, |
| "loss": 1.0551, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.6011852715437677, |
| "grad_norm": 0.20108483731746674, |
| "learning_rate": 8.003114052160375e-05, |
| "loss": 1.0326, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.601573885164675, |
| "grad_norm": 0.19739678502082825, |
| "learning_rate": 7.99532892175944e-05, |
| "loss": 1.0319, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.6019624987855824, |
| "grad_norm": 0.21635359525680542, |
| "learning_rate": 7.987543791358506e-05, |
| "loss": 1.0465, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.6023511124064899, |
| "grad_norm": 0.1949319988489151, |
| "learning_rate": 7.979758660957572e-05, |
| "loss": 1.0026, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6027397260273972, |
| "grad_norm": 0.1989699900150299, |
| "learning_rate": 7.971973530556637e-05, |
| "loss": 1.021, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.6031283396483047, |
| "grad_norm": 0.24031391739845276, |
| "learning_rate": 7.964188400155703e-05, |
| "loss": 1.0293, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.6035169532692121, |
| "grad_norm": 0.21247251331806183, |
| "learning_rate": 7.956403269754769e-05, |
| "loss": 1.023, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.6039055668901195, |
| "grad_norm": 0.21565628051757812, |
| "learning_rate": 7.948618139353835e-05, |
| "loss": 1.1027, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.6042941805110269, |
| "grad_norm": 0.21207931637763977, |
| "learning_rate": 7.9408330089529e-05, |
| "loss": 1.0634, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.6046827941319344, |
| "grad_norm": 0.21354155242443085, |
| "learning_rate": 7.933047878551965e-05, |
| "loss": 1.0433, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.6050714077528417, |
| "grad_norm": 0.21708370745182037, |
| "learning_rate": 7.925262748151032e-05, |
| "loss": 1.0499, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.6054600213737491, |
| "grad_norm": 0.2051447182893753, |
| "learning_rate": 7.917477617750097e-05, |
| "loss": 1.0042, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.6058486349946566, |
| "grad_norm": 0.18768000602722168, |
| "learning_rate": 7.909692487349164e-05, |
| "loss": 1.009, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.6062372486155639, |
| "grad_norm": 0.2142931967973709, |
| "learning_rate": 7.901907356948229e-05, |
| "loss": 1.0458, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6066258622364714, |
| "grad_norm": 0.21006444096565247, |
| "learning_rate": 7.894122226547294e-05, |
| "loss": 1.0286, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.6070144758573788, |
| "grad_norm": 0.2187039703130722, |
| "learning_rate": 7.886337096146361e-05, |
| "loss": 1.0103, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.6074030894782863, |
| "grad_norm": 0.19863669574260712, |
| "learning_rate": 7.878551965745426e-05, |
| "loss": 0.9925, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.6077917030991936, |
| "grad_norm": 0.21771976351737976, |
| "learning_rate": 7.870766835344493e-05, |
| "loss": 0.9853, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.6081803167201011, |
| "grad_norm": 0.21714983880519867, |
| "learning_rate": 7.862981704943558e-05, |
| "loss": 1.0123, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.6085689303410085, |
| "grad_norm": 0.2251398265361786, |
| "learning_rate": 7.855196574542624e-05, |
| "loss": 1.0265, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.6089575439619158, |
| "grad_norm": 0.22089716792106628, |
| "learning_rate": 7.84741144414169e-05, |
| "loss": 1.0689, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.6093461575828233, |
| "grad_norm": 0.2453841269016266, |
| "learning_rate": 7.839626313740756e-05, |
| "loss": 1.0185, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.6097347712037307, |
| "grad_norm": 0.21866528689861298, |
| "learning_rate": 7.831841183339821e-05, |
| "loss": 1.0361, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.6101233848246381, |
| "grad_norm": 0.22421486675739288, |
| "learning_rate": 7.824056052938888e-05, |
| "loss": 1.024, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6105119984455455, |
| "grad_norm": 0.21107137203216553, |
| "learning_rate": 7.816270922537953e-05, |
| "loss": 1.0335, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.610900612066453, |
| "grad_norm": 0.20731772482395172, |
| "learning_rate": 7.80848579213702e-05, |
| "loss": 1.0563, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.6112892256873603, |
| "grad_norm": 0.19535884261131287, |
| "learning_rate": 7.800700661736085e-05, |
| "loss": 0.9698, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.6116778393082678, |
| "grad_norm": 0.20449021458625793, |
| "learning_rate": 7.79291553133515e-05, |
| "loss": 1.0125, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.6120664529291752, |
| "grad_norm": 0.19576509296894073, |
| "learning_rate": 7.785130400934217e-05, |
| "loss": 0.9326, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.6124550665500825, |
| "grad_norm": 0.18914124369621277, |
| "learning_rate": 7.777345270533282e-05, |
| "loss": 0.9939, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.61284368017099, |
| "grad_norm": 0.21239091455936432, |
| "learning_rate": 7.769560140132348e-05, |
| "loss": 1.0271, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.6132322937918974, |
| "grad_norm": 0.22204811871051788, |
| "learning_rate": 7.761775009731414e-05, |
| "loss": 1.0524, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.6136209074128048, |
| "grad_norm": 0.20047850906848907, |
| "learning_rate": 7.753989879330479e-05, |
| "loss": 1.0076, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.6140095210337122, |
| "grad_norm": 0.22619746625423431, |
| "learning_rate": 7.746204748929545e-05, |
| "loss": 1.0611, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6143981346546197, |
| "grad_norm": 0.2500879466533661, |
| "learning_rate": 7.73841961852861e-05, |
| "loss": 1.0364, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.614786748275527, |
| "grad_norm": 0.23486928641796112, |
| "learning_rate": 7.730634488127676e-05, |
| "loss": 1.0472, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.6151753618964345, |
| "grad_norm": 0.19849038124084473, |
| "learning_rate": 7.722849357726742e-05, |
| "loss": 0.9847, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.6155639755173419, |
| "grad_norm": 0.21516263484954834, |
| "learning_rate": 7.715064227325807e-05, |
| "loss": 1.0351, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.6159525891382492, |
| "grad_norm": 0.20137760043144226, |
| "learning_rate": 7.707279096924874e-05, |
| "loss": 0.9879, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.6163412027591567, |
| "grad_norm": 0.2146228402853012, |
| "learning_rate": 7.699493966523939e-05, |
| "loss": 1.0792, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.6167298163800641, |
| "grad_norm": 0.19929760694503784, |
| "learning_rate": 7.691708836123004e-05, |
| "loss": 1.0313, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.6171184300009716, |
| "grad_norm": 0.201123908162117, |
| "learning_rate": 7.683923705722071e-05, |
| "loss": 1.0279, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.6175070436218789, |
| "grad_norm": 0.2154105007648468, |
| "learning_rate": 7.676138575321136e-05, |
| "loss": 1.075, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.6178956572427864, |
| "grad_norm": 0.2028442770242691, |
| "learning_rate": 7.668353444920203e-05, |
| "loss": 0.9771, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6182842708636938, |
| "grad_norm": 0.18003074824810028, |
| "learning_rate": 7.660568314519268e-05, |
| "loss": 0.9677, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.6186728844846012, |
| "grad_norm": 0.23250891268253326, |
| "learning_rate": 7.652783184118335e-05, |
| "loss": 1.015, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.6190614981055086, |
| "grad_norm": 0.2047244906425476, |
| "learning_rate": 7.6449980537174e-05, |
| "loss": 1.0044, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.6194501117264161, |
| "grad_norm": 0.20011259615421295, |
| "learning_rate": 7.637212923316466e-05, |
| "loss": 1.0089, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.6198387253473234, |
| "grad_norm": 0.2212608903646469, |
| "learning_rate": 7.629427792915533e-05, |
| "loss": 1.0457, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.6202273389682308, |
| "grad_norm": 0.22725115716457367, |
| "learning_rate": 7.621642662514598e-05, |
| "loss": 1.1198, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.6206159525891383, |
| "grad_norm": 0.2065306007862091, |
| "learning_rate": 7.613857532113663e-05, |
| "loss": 1.0572, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.6210045662100456, |
| "grad_norm": 0.2132783830165863, |
| "learning_rate": 7.60607240171273e-05, |
| "loss": 1.0332, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.6213931798309531, |
| "grad_norm": 0.20527103543281555, |
| "learning_rate": 7.598287271311795e-05, |
| "loss": 1.0156, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.6217817934518605, |
| "grad_norm": 0.23608024418354034, |
| "learning_rate": 7.59050214091086e-05, |
| "loss": 1.0379, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6221704070727679, |
| "grad_norm": 0.22227297723293304, |
| "learning_rate": 7.582717010509927e-05, |
| "loss": 1.0507, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.6225590206936753, |
| "grad_norm": 0.22359615564346313, |
| "learning_rate": 7.574931880108992e-05, |
| "loss": 1.0705, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.6229476343145828, |
| "grad_norm": 0.20478755235671997, |
| "learning_rate": 7.567146749708059e-05, |
| "loss": 1.0309, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.6233362479354901, |
| "grad_norm": 0.2223423272371292, |
| "learning_rate": 7.559361619307124e-05, |
| "loss": 1.0386, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.6237248615563975, |
| "grad_norm": 0.21232105791568756, |
| "learning_rate": 7.551576488906189e-05, |
| "loss": 1.0353, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.624113475177305, |
| "grad_norm": 0.22431129217147827, |
| "learning_rate": 7.543791358505256e-05, |
| "loss": 1.1017, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.6245020887982123, |
| "grad_norm": 0.20826031267642975, |
| "learning_rate": 7.536006228104321e-05, |
| "loss": 1.0172, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.6248907024191198, |
| "grad_norm": 0.2803161144256592, |
| "learning_rate": 7.528221097703387e-05, |
| "loss": 1.0554, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.6252793160400272, |
| "grad_norm": 0.2185174971818924, |
| "learning_rate": 7.520435967302453e-05, |
| "loss": 0.9842, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.6256679296609347, |
| "grad_norm": 0.2091478854417801, |
| "learning_rate": 7.512650836901518e-05, |
| "loss": 0.9783, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.626056543281842, |
| "grad_norm": 0.22342967987060547, |
| "learning_rate": 7.504865706500584e-05, |
| "loss": 0.9891, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.6264451569027495, |
| "grad_norm": 0.195283442735672, |
| "learning_rate": 7.49708057609965e-05, |
| "loss": 0.9654, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.6268337705236569, |
| "grad_norm": 0.21048255264759064, |
| "learning_rate": 7.489295445698716e-05, |
| "loss": 1.0112, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.6272223841445642, |
| "grad_norm": 0.21405541896820068, |
| "learning_rate": 7.481510315297781e-05, |
| "loss": 1.0498, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.6276109977654717, |
| "grad_norm": 0.2144453227519989, |
| "learning_rate": 7.473725184896846e-05, |
| "loss": 1.0487, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.627999611386379, |
| "grad_norm": 0.21963326632976532, |
| "learning_rate": 7.465940054495913e-05, |
| "loss": 1.0634, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.6283882250072865, |
| "grad_norm": 0.20100601017475128, |
| "learning_rate": 7.458154924094978e-05, |
| "loss": 1.0407, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.6287768386281939, |
| "grad_norm": 0.19469478726387024, |
| "learning_rate": 7.450369793694045e-05, |
| "loss": 0.9923, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.6291654522491014, |
| "grad_norm": 0.2114047408103943, |
| "learning_rate": 7.442584663293111e-05, |
| "loss": 1.0263, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.6295540658700087, |
| "grad_norm": 0.21080389618873596, |
| "learning_rate": 7.434799532892177e-05, |
| "loss": 1.0012, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6299426794909162, |
| "grad_norm": 0.20366831123828888, |
| "learning_rate": 7.427014402491243e-05, |
| "loss": 1.0254, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.6303312931118236, |
| "grad_norm": 0.209821879863739, |
| "learning_rate": 7.419229272090308e-05, |
| "loss": 0.9416, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.6307199067327309, |
| "grad_norm": 0.2228868007659912, |
| "learning_rate": 7.411444141689374e-05, |
| "loss": 1.0128, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.6311085203536384, |
| "grad_norm": 0.19673995673656464, |
| "learning_rate": 7.40365901128844e-05, |
| "loss": 0.9709, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.6314971339745458, |
| "grad_norm": 0.21590839326381683, |
| "learning_rate": 7.395873880887505e-05, |
| "loss": 1.0251, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.6318857475954532, |
| "grad_norm": 0.20200593769550323, |
| "learning_rate": 7.388088750486572e-05, |
| "loss": 1.0307, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.6322743612163606, |
| "grad_norm": 0.19623909890651703, |
| "learning_rate": 7.380303620085637e-05, |
| "loss": 1.0375, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.6326629748372681, |
| "grad_norm": 0.19878128170967102, |
| "learning_rate": 7.372518489684702e-05, |
| "loss": 0.9844, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.6330515884581754, |
| "grad_norm": 0.21292422711849213, |
| "learning_rate": 7.364733359283769e-05, |
| "loss": 1.0228, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.6334402020790829, |
| "grad_norm": 0.1915559619665146, |
| "learning_rate": 7.356948228882834e-05, |
| "loss": 0.9818, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6338288156999903, |
| "grad_norm": 0.2264430969953537, |
| "learning_rate": 7.3491630984819e-05, |
| "loss": 1.146, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.6342174293208978, |
| "grad_norm": 0.19332270324230194, |
| "learning_rate": 7.341377968080966e-05, |
| "loss": 1.0007, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.6346060429418051, |
| "grad_norm": 0.217147096991539, |
| "learning_rate": 7.333592837680031e-05, |
| "loss": 1.0498, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.6349946565627125, |
| "grad_norm": 0.22200679779052734, |
| "learning_rate": 7.325807707279098e-05, |
| "loss": 1.0358, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.63538327018362, |
| "grad_norm": 0.19485117495059967, |
| "learning_rate": 7.318022576878163e-05, |
| "loss": 0.9717, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6357718838045273, |
| "grad_norm": 0.20595680177211761, |
| "learning_rate": 7.310237446477228e-05, |
| "loss": 1.0195, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.6361604974254348, |
| "grad_norm": 0.21184709668159485, |
| "learning_rate": 7.302452316076294e-05, |
| "loss": 1.0354, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.6365491110463422, |
| "grad_norm": 0.22607794404029846, |
| "learning_rate": 7.29466718567536e-05, |
| "loss": 1.0217, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.6369377246672496, |
| "grad_norm": 0.20236065983772278, |
| "learning_rate": 7.286882055274426e-05, |
| "loss": 1.0441, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.637326338288157, |
| "grad_norm": 0.19979622960090637, |
| "learning_rate": 7.279096924873491e-05, |
| "loss": 1.0105, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6377149519090645, |
| "grad_norm": 0.2655459940433502, |
| "learning_rate": 7.271311794472557e-05, |
| "loss": 1.0726, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.6381035655299718, |
| "grad_norm": 0.25107496976852417, |
| "learning_rate": 7.263526664071623e-05, |
| "loss": 1.037, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.6384921791508792, |
| "grad_norm": 0.19250229001045227, |
| "learning_rate": 7.255741533670688e-05, |
| "loss": 0.9741, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.6388807927717867, |
| "grad_norm": 0.19324181973934174, |
| "learning_rate": 7.247956403269755e-05, |
| "loss": 1.0333, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.639269406392694, |
| "grad_norm": 0.22267483174800873, |
| "learning_rate": 7.240171272868822e-05, |
| "loss": 1.0313, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.6396580200136015, |
| "grad_norm": 0.2775348722934723, |
| "learning_rate": 7.232386142467887e-05, |
| "loss": 1.0686, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.6400466336345089, |
| "grad_norm": 0.1886623501777649, |
| "learning_rate": 7.224601012066953e-05, |
| "loss": 1.0029, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.6404352472554163, |
| "grad_norm": 0.20303374528884888, |
| "learning_rate": 7.216815881666019e-05, |
| "loss": 1.0346, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.6408238608763237, |
| "grad_norm": 0.20815756916999817, |
| "learning_rate": 7.209030751265085e-05, |
| "loss": 1.0258, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.6412124744972312, |
| "grad_norm": 0.22055703401565552, |
| "learning_rate": 7.20124562086415e-05, |
| "loss": 1.0215, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6416010881181385, |
| "grad_norm": 0.20248562097549438, |
| "learning_rate": 7.193460490463215e-05, |
| "loss": 0.9979, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.6419897017390459, |
| "grad_norm": 0.2093247026205063, |
| "learning_rate": 7.185675360062282e-05, |
| "loss": 1.0605, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.6423783153599534, |
| "grad_norm": 0.22276204824447632, |
| "learning_rate": 7.177890229661347e-05, |
| "loss": 1.0788, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.6427669289808607, |
| "grad_norm": 0.19959624111652374, |
| "learning_rate": 7.170105099260412e-05, |
| "loss": 0.9954, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.6431555426017682, |
| "grad_norm": 0.20173248648643494, |
| "learning_rate": 7.162319968859479e-05, |
| "loss": 1.003, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6435441562226756, |
| "grad_norm": 0.207533061504364, |
| "learning_rate": 7.154534838458544e-05, |
| "loss": 1.043, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.643932769843583, |
| "grad_norm": 0.21928350627422333, |
| "learning_rate": 7.146749708057611e-05, |
| "loss": 1.0472, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.6443213834644904, |
| "grad_norm": 0.2567078173160553, |
| "learning_rate": 7.138964577656676e-05, |
| "loss": 1.0946, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.6447099970853979, |
| "grad_norm": 0.19454176723957062, |
| "learning_rate": 7.131179447255741e-05, |
| "loss": 0.9437, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.6450986107063053, |
| "grad_norm": 0.19198423624038696, |
| "learning_rate": 7.123394316854808e-05, |
| "loss": 0.9976, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6454872243272126, |
| "grad_norm": 0.1929445117712021, |
| "learning_rate": 7.115609186453873e-05, |
| "loss": 1.0279, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.6458758379481201, |
| "grad_norm": 0.2041027694940567, |
| "learning_rate": 7.10782405605294e-05, |
| "loss": 1.0458, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.6462644515690275, |
| "grad_norm": 0.23750995099544525, |
| "learning_rate": 7.100038925652005e-05, |
| "loss": 1.0916, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.6466530651899349, |
| "grad_norm": 0.1971994787454605, |
| "learning_rate": 7.09225379525107e-05, |
| "loss": 0.951, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.6470416788108423, |
| "grad_norm": 0.20459246635437012, |
| "learning_rate": 7.084468664850136e-05, |
| "loss": 0.9653, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6474302924317498, |
| "grad_norm": 0.2137187272310257, |
| "learning_rate": 7.076683534449202e-05, |
| "loss": 1.0291, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.6478189060526571, |
| "grad_norm": 0.21235258877277374, |
| "learning_rate": 7.068898404048268e-05, |
| "loss": 1.0104, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.6482075196735646, |
| "grad_norm": 0.23120944201946259, |
| "learning_rate": 7.061113273647333e-05, |
| "loss": 1.0693, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.648596133294472, |
| "grad_norm": 1.38257896900177, |
| "learning_rate": 7.053328143246399e-05, |
| "loss": 1.0339, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.6489847469153793, |
| "grad_norm": 0.20898790657520294, |
| "learning_rate": 7.045543012845465e-05, |
| "loss": 1.004, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6493733605362868, |
| "grad_norm": 0.20251236855983734, |
| "learning_rate": 7.037757882444532e-05, |
| "loss": 0.9992, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6497619741571942, |
| "grad_norm": 0.2358030527830124, |
| "learning_rate": 7.029972752043597e-05, |
| "loss": 0.9854, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6501505877781016, |
| "grad_norm": 0.18945704400539398, |
| "learning_rate": 7.022187621642664e-05, |
| "loss": 0.9677, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.650539201399009, |
| "grad_norm": 0.1965213567018509, |
| "learning_rate": 7.014402491241729e-05, |
| "loss": 1.0118, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.6509278150199165, |
| "grad_norm": 0.2340148687362671, |
| "learning_rate": 7.006617360840795e-05, |
| "loss": 1.0312, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6513164286408238, |
| "grad_norm": 0.1992296278476715, |
| "learning_rate": 6.99883223043986e-05, |
| "loss": 1.0155, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6517050422617313, |
| "grad_norm": 0.20410223305225372, |
| "learning_rate": 6.991047100038926e-05, |
| "loss": 1.0646, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.6520936558826387, |
| "grad_norm": 0.19254536926746368, |
| "learning_rate": 6.983261969637992e-05, |
| "loss": 0.9538, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6524822695035462, |
| "grad_norm": 0.19980847835540771, |
| "learning_rate": 6.975476839237057e-05, |
| "loss": 0.9912, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6528708831244535, |
| "grad_norm": 0.19503261148929596, |
| "learning_rate": 6.967691708836124e-05, |
| "loss": 0.9844, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6532594967453609, |
| "grad_norm": 0.22375883162021637, |
| "learning_rate": 6.959906578435189e-05, |
| "loss": 1.1266, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.6536481103662684, |
| "grad_norm": 0.21456514298915863, |
| "learning_rate": 6.952121448034254e-05, |
| "loss": 1.0902, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6540367239871757, |
| "grad_norm": 0.20348122715950012, |
| "learning_rate": 6.944336317633321e-05, |
| "loss": 1.0228, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6544253376080832, |
| "grad_norm": 0.21647393703460693, |
| "learning_rate": 6.936551187232386e-05, |
| "loss": 1.0653, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6548139512289906, |
| "grad_norm": 0.20160923898220062, |
| "learning_rate": 6.928766056831453e-05, |
| "loss": 1.0249, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.655202564849898, |
| "grad_norm": 0.20070499181747437, |
| "learning_rate": 6.920980926430518e-05, |
| "loss": 1.0585, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6555911784708054, |
| "grad_norm": 0.2656902074813843, |
| "learning_rate": 6.913195796029583e-05, |
| "loss": 1.0042, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.6559797920917129, |
| "grad_norm": 0.1934545785188675, |
| "learning_rate": 6.90541066562865e-05, |
| "loss": 0.9831, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.6563684057126202, |
| "grad_norm": 0.21719245612621307, |
| "learning_rate": 6.897625535227715e-05, |
| "loss": 0.9934, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6567570193335276, |
| "grad_norm": 0.20906969904899597, |
| "learning_rate": 6.889840404826782e-05, |
| "loss": 1.023, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6571456329544351, |
| "grad_norm": 0.225227490067482, |
| "learning_rate": 6.882055274425847e-05, |
| "loss": 1.0265, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6575342465753424, |
| "grad_norm": 0.22766710817813873, |
| "learning_rate": 6.874270144024912e-05, |
| "loss": 1.0306, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6579228601962499, |
| "grad_norm": 0.20964065194129944, |
| "learning_rate": 6.866485013623978e-05, |
| "loss": 0.9431, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.6583114738171573, |
| "grad_norm": 0.19821231067180634, |
| "learning_rate": 6.858699883223044e-05, |
| "loss": 0.9959, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.6587000874380647, |
| "grad_norm": 0.2071307748556137, |
| "learning_rate": 6.85091475282211e-05, |
| "loss": 1.0332, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6590887010589721, |
| "grad_norm": 0.27962490916252136, |
| "learning_rate": 6.843129622421175e-05, |
| "loss": 0.9755, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6594773146798796, |
| "grad_norm": 0.21582698822021484, |
| "learning_rate": 6.835344492020242e-05, |
| "loss": 1.0305, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6598659283007869, |
| "grad_norm": 0.1872921586036682, |
| "learning_rate": 6.827559361619307e-05, |
| "loss": 0.9693, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.6602545419216943, |
| "grad_norm": 0.27033379673957825, |
| "learning_rate": 6.819774231218374e-05, |
| "loss": 1.0756, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6606431555426018, |
| "grad_norm": 0.2010008543729782, |
| "learning_rate": 6.811989100817439e-05, |
| "loss": 1.0077, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6610317691635091, |
| "grad_norm": 0.20637495815753937, |
| "learning_rate": 6.804203970416506e-05, |
| "loss": 1.0208, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6614203827844166, |
| "grad_norm": 0.21331818401813507, |
| "learning_rate": 6.796418840015571e-05, |
| "loss": 1.0242, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.661808996405324, |
| "grad_norm": 0.2092941552400589, |
| "learning_rate": 6.788633709614637e-05, |
| "loss": 1.0949, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.6621976100262315, |
| "grad_norm": 0.22332265973091125, |
| "learning_rate": 6.780848579213703e-05, |
| "loss": 1.1068, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6625862236471388, |
| "grad_norm": 0.20077067613601685, |
| "learning_rate": 6.773063448812768e-05, |
| "loss": 0.9801, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6629748372680463, |
| "grad_norm": 0.2057008296251297, |
| "learning_rate": 6.765278318411834e-05, |
| "loss": 1.0058, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.6633634508889537, |
| "grad_norm": 0.20337353646755219, |
| "learning_rate": 6.7574931880109e-05, |
| "loss": 1.0141, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.663752064509861, |
| "grad_norm": 0.22756130993366241, |
| "learning_rate": 6.749708057609966e-05, |
| "loss": 1.0287, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.6641406781307685, |
| "grad_norm": 0.2052423506975174, |
| "learning_rate": 6.741922927209031e-05, |
| "loss": 1.0069, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.6645292917516759, |
| "grad_norm": 0.1988023817539215, |
| "learning_rate": 6.734137796808096e-05, |
| "loss": 0.9761, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6649179053725833, |
| "grad_norm": 0.20491188764572144, |
| "learning_rate": 6.726352666407163e-05, |
| "loss": 0.9767, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6653065189934907, |
| "grad_norm": 0.18790274858474731, |
| "learning_rate": 6.718567536006228e-05, |
| "loss": 0.9944, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6656951326143982, |
| "grad_norm": 0.19979891180992126, |
| "learning_rate": 6.710782405605293e-05, |
| "loss": 1.0842, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.6660837462353055, |
| "grad_norm": 0.22204813361167908, |
| "learning_rate": 6.70299727520436e-05, |
| "loss": 1.0561, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.666472359856213, |
| "grad_norm": 0.20182965695858002, |
| "learning_rate": 6.695212144803425e-05, |
| "loss": 1.0015, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6668609734771204, |
| "grad_norm": 0.20719997584819794, |
| "learning_rate": 6.687427014402492e-05, |
| "loss": 1.0144, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.6672495870980278, |
| "grad_norm": 0.1944626122713089, |
| "learning_rate": 6.679641884001557e-05, |
| "loss": 1.0083, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6676382007189352, |
| "grad_norm": 0.2072264701128006, |
| "learning_rate": 6.671856753600622e-05, |
| "loss": 1.0246, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.6680268143398426, |
| "grad_norm": 0.2134973257780075, |
| "learning_rate": 6.664071623199689e-05, |
| "loss": 1.0926, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.66841542796075, |
| "grad_norm": 0.2119186669588089, |
| "learning_rate": 6.656286492798754e-05, |
| "loss": 1.0129, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6688040415816574, |
| "grad_norm": 0.21205540001392365, |
| "learning_rate": 6.64850136239782e-05, |
| "loss": 1.0611, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6691926552025649, |
| "grad_norm": 0.21632088720798492, |
| "learning_rate": 6.640716231996886e-05, |
| "loss": 1.0821, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6695812688234722, |
| "grad_norm": 0.21734434366226196, |
| "learning_rate": 6.632931101595952e-05, |
| "loss": 1.0821, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.6699698824443797, |
| "grad_norm": 0.2030603289604187, |
| "learning_rate": 6.625145971195017e-05, |
| "loss": 0.9976, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.6703584960652871, |
| "grad_norm": 0.19921456277370453, |
| "learning_rate": 6.617360840794084e-05, |
| "loss": 0.9187, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6707471096861946, |
| "grad_norm": 0.20548826456069946, |
| "learning_rate": 6.60957571039315e-05, |
| "loss": 1.0486, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6711357233071019, |
| "grad_norm": 0.21784676611423492, |
| "learning_rate": 6.601790579992216e-05, |
| "loss": 1.1089, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.6715243369280093, |
| "grad_norm": 0.2137753963470459, |
| "learning_rate": 6.594005449591281e-05, |
| "loss": 1.0075, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.6719129505489168, |
| "grad_norm": 0.20200639963150024, |
| "learning_rate": 6.586220319190348e-05, |
| "loss": 0.9915, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.6723015641698241, |
| "grad_norm": 0.20898796617984772, |
| "learning_rate": 6.578435188789413e-05, |
| "loss": 1.0292, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6726901777907316, |
| "grad_norm": 0.22515977919101715, |
| "learning_rate": 6.570650058388478e-05, |
| "loss": 1.0118, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.673078791411639, |
| "grad_norm": 0.2132793813943863, |
| "learning_rate": 6.562864927987545e-05, |
| "loss": 1.1097, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.6734674050325464, |
| "grad_norm": 0.20358797907829285, |
| "learning_rate": 6.55507979758661e-05, |
| "loss": 1.0241, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6738560186534538, |
| "grad_norm": 0.21155016124248505, |
| "learning_rate": 6.547294667185676e-05, |
| "loss": 1.0235, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6742446322743613, |
| "grad_norm": 0.198009192943573, |
| "learning_rate": 6.539509536784741e-05, |
| "loss": 0.9542, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6746332458952686, |
| "grad_norm": 0.20318005979061127, |
| "learning_rate": 6.531724406383807e-05, |
| "loss": 0.9993, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.675021859516176, |
| "grad_norm": 0.21384860575199127, |
| "learning_rate": 6.523939275982873e-05, |
| "loss": 1.1188, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6754104731370835, |
| "grad_norm": 0.18736955523490906, |
| "learning_rate": 6.516154145581938e-05, |
| "loss": 0.9832, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6757990867579908, |
| "grad_norm": 0.2002391368150711, |
| "learning_rate": 6.508369015181005e-05, |
| "loss": 1.0288, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6761877003788983, |
| "grad_norm": 0.20011006295681, |
| "learning_rate": 6.50058388478007e-05, |
| "loss": 0.9588, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6765763139998057, |
| "grad_norm": 0.20782291889190674, |
| "learning_rate": 6.492798754379135e-05, |
| "loss": 1.0033, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6769649276207131, |
| "grad_norm": 0.2056814581155777, |
| "learning_rate": 6.485013623978202e-05, |
| "loss": 1.0648, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6773535412416205, |
| "grad_norm": 0.2207457572221756, |
| "learning_rate": 6.477228493577267e-05, |
| "loss": 1.0758, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.677742154862528, |
| "grad_norm": 0.20437198877334595, |
| "learning_rate": 6.469443363176334e-05, |
| "loss": 1.0253, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6781307684834353, |
| "grad_norm": 0.198721781373024, |
| "learning_rate": 6.461658232775399e-05, |
| "loss": 1.0087, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6785193821043427, |
| "grad_norm": 0.22781015932559967, |
| "learning_rate": 6.453873102374464e-05, |
| "loss": 1.0692, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6789079957252502, |
| "grad_norm": 0.21826857328414917, |
| "learning_rate": 6.446087971973531e-05, |
| "loss": 1.0232, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6792966093461575, |
| "grad_norm": 0.2156928926706314, |
| "learning_rate": 6.438302841572596e-05, |
| "loss": 1.0686, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.679685222967065, |
| "grad_norm": 0.2161693125963211, |
| "learning_rate": 6.430517711171662e-05, |
| "loss": 1.0298, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6800738365879724, |
| "grad_norm": 0.19139425456523895, |
| "learning_rate": 6.422732580770729e-05, |
| "loss": 0.9545, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6804624502088799, |
| "grad_norm": 0.22626161575317383, |
| "learning_rate": 6.414947450369794e-05, |
| "loss": 1.0669, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.2135801464319229, |
| "learning_rate": 6.407162319968861e-05, |
| "loss": 1.0187, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6812396774506947, |
| "grad_norm": 0.20803681015968323, |
| "learning_rate": 6.399377189567926e-05, |
| "loss": 1.0856, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.681628291071602, |
| "grad_norm": 0.21317154169082642, |
| "learning_rate": 6.391592059166991e-05, |
| "loss": 1.1018, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6820169046925094, |
| "grad_norm": 0.20877891778945923, |
| "learning_rate": 6.383806928766058e-05, |
| "loss": 1.0383, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.6824055183134169, |
| "grad_norm": 0.20769146084785461, |
| "learning_rate": 6.376021798365123e-05, |
| "loss": 1.0852, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6827941319343243, |
| "grad_norm": 0.2252657413482666, |
| "learning_rate": 6.36823666796419e-05, |
| "loss": 1.0749, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6831827455552317, |
| "grad_norm": 0.24453257024288177, |
| "learning_rate": 6.360451537563255e-05, |
| "loss": 1.1042, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.6835713591761391, |
| "grad_norm": 0.2082965075969696, |
| "learning_rate": 6.35266640716232e-05, |
| "loss": 1.0729, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6839599727970466, |
| "grad_norm": 0.20121856033802032, |
| "learning_rate": 6.344881276761387e-05, |
| "loss": 1.038, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6843485864179539, |
| "grad_norm": 0.20096386969089508, |
| "learning_rate": 6.337096146360452e-05, |
| "loss": 0.9655, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6847372000388614, |
| "grad_norm": 0.20015959441661835, |
| "learning_rate": 6.329311015959518e-05, |
| "loss": 1.0187, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.6851258136597688, |
| "grad_norm": 0.21056395769119263, |
| "learning_rate": 6.321525885558583e-05, |
| "loss": 1.0567, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6855144272806762, |
| "grad_norm": 0.2211030125617981, |
| "learning_rate": 6.313740755157649e-05, |
| "loss": 1.0588, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6859030409015836, |
| "grad_norm": 0.20809797942638397, |
| "learning_rate": 6.305955624756715e-05, |
| "loss": 0.9488, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.686291654522491, |
| "grad_norm": 0.2331530600786209, |
| "learning_rate": 6.29817049435578e-05, |
| "loss": 1.0789, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6866802681433984, |
| "grad_norm": 0.21708674728870392, |
| "learning_rate": 6.290385363954846e-05, |
| "loss": 1.0518, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6870688817643058, |
| "grad_norm": 0.2088184356689453, |
| "learning_rate": 6.282600233553912e-05, |
| "loss": 1.0178, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.6874574953852133, |
| "grad_norm": 0.20285943150520325, |
| "learning_rate": 6.274815103152977e-05, |
| "loss": 1.018, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6878461090061206, |
| "grad_norm": 0.211436927318573, |
| "learning_rate": 6.267029972752044e-05, |
| "loss": 1.0572, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6882347226270281, |
| "grad_norm": 0.21108384430408478, |
| "learning_rate": 6.259244842351109e-05, |
| "loss": 1.0227, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6886233362479355, |
| "grad_norm": 0.2060437649488449, |
| "learning_rate": 6.251459711950174e-05, |
| "loss": 1.0251, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.689011949868843, |
| "grad_norm": 0.20819245278835297, |
| "learning_rate": 6.243674581549241e-05, |
| "loss": 1.0643, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6894005634897503, |
| "grad_norm": 0.2172113060951233, |
| "learning_rate": 6.235889451148306e-05, |
| "loss": 1.0869, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6897891771106577, |
| "grad_norm": 0.2087356299161911, |
| "learning_rate": 6.228104320747373e-05, |
| "loss": 1.0622, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6901777907315652, |
| "grad_norm": 0.1958473175764084, |
| "learning_rate": 6.220319190346439e-05, |
| "loss": 0.9542, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6905664043524725, |
| "grad_norm": 0.23630915582180023, |
| "learning_rate": 6.212534059945504e-05, |
| "loss": 1.0535, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.69095501797338, |
| "grad_norm": 0.2127649188041687, |
| "learning_rate": 6.204748929544571e-05, |
| "loss": 0.972, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6913436315942874, |
| "grad_norm": 0.19873055815696716, |
| "learning_rate": 6.196963799143636e-05, |
| "loss": 0.9969, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6917322452151948, |
| "grad_norm": 0.2013067901134491, |
| "learning_rate": 6.189178668742703e-05, |
| "loss": 1.0399, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6921208588361022, |
| "grad_norm": 0.21300987899303436, |
| "learning_rate": 6.181393538341768e-05, |
| "loss": 1.0377, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.6925094724570097, |
| "grad_norm": 0.21665994822978973, |
| "learning_rate": 6.173608407940833e-05, |
| "loss": 1.008, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.692898086077917, |
| "grad_norm": 0.21622590720653534, |
| "learning_rate": 6.1658232775399e-05, |
| "loss": 1.1128, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6932866996988244, |
| "grad_norm": 0.2000272423028946, |
| "learning_rate": 6.158038147138965e-05, |
| "loss": 1.0115, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6936753133197319, |
| "grad_norm": 0.20774856209754944, |
| "learning_rate": 6.15025301673803e-05, |
| "loss": 1.066, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6940639269406392, |
| "grad_norm": 0.18497461080551147, |
| "learning_rate": 6.142467886337097e-05, |
| "loss": 0.9608, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.6944525405615467, |
| "grad_norm": 0.19819007813930511, |
| "learning_rate": 6.134682755936162e-05, |
| "loss": 1.0114, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.6948411541824541, |
| "grad_norm": 0.22013314068317413, |
| "learning_rate": 6.126897625535229e-05, |
| "loss": 0.976, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6952297678033615, |
| "grad_norm": 0.2066160887479782, |
| "learning_rate": 6.119112495134294e-05, |
| "loss": 1.0585, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6956183814242689, |
| "grad_norm": 0.21364475786685944, |
| "learning_rate": 6.111327364733359e-05, |
| "loss": 1.0842, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6960069950451764, |
| "grad_norm": 0.19731444120407104, |
| "learning_rate": 6.103542234332425e-05, |
| "loss": 0.9936, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6963956086660837, |
| "grad_norm": 0.2162671983242035, |
| "learning_rate": 6.095757103931491e-05, |
| "loss": 1.0446, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.6967842222869911, |
| "grad_norm": 0.21486608684062958, |
| "learning_rate": 6.087971973530557e-05, |
| "loss": 1.0441, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6971728359078986, |
| "grad_norm": 0.20850563049316406, |
| "learning_rate": 6.0801868431296224e-05, |
| "loss": 1.0431, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.6975614495288059, |
| "grad_norm": 0.20492027699947357, |
| "learning_rate": 6.072401712728688e-05, |
| "loss": 0.9845, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6979500631497134, |
| "grad_norm": 0.1986648142337799, |
| "learning_rate": 6.064616582327754e-05, |
| "loss": 0.9855, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.6983386767706208, |
| "grad_norm": 0.20606310665607452, |
| "learning_rate": 6.05683145192682e-05, |
| "loss": 1.0608, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.6987272903915283, |
| "grad_norm": 0.20496073365211487, |
| "learning_rate": 6.0490463215258867e-05, |
| "loss": 1.0311, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.6991159040124356, |
| "grad_norm": 0.2153409719467163, |
| "learning_rate": 6.041261191124952e-05, |
| "loss": 1.0394, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6995045176333431, |
| "grad_norm": 0.21410655975341797, |
| "learning_rate": 6.033476060724017e-05, |
| "loss": 1.0229, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6998931312542505, |
| "grad_norm": 0.20418782532215118, |
| "learning_rate": 6.0256909303230836e-05, |
| "loss": 1.0382, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.7002817448751578, |
| "grad_norm": 0.19154146313667297, |
| "learning_rate": 6.017905799922149e-05, |
| "loss": 0.9891, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.7006703584960653, |
| "grad_norm": 0.19138328731060028, |
| "learning_rate": 6.010120669521214e-05, |
| "loss": 0.9638, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.7010589721169727, |
| "grad_norm": 0.19704872369766235, |
| "learning_rate": 6.0023355391202806e-05, |
| "loss": 0.9835, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.7014475857378801, |
| "grad_norm": 0.2175600379705429, |
| "learning_rate": 5.994550408719346e-05, |
| "loss": 1.1192, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.7018361993587875, |
| "grad_norm": 0.21614274382591248, |
| "learning_rate": 5.9867652783184124e-05, |
| "loss": 1.0877, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.702224812979695, |
| "grad_norm": 0.20461414754390717, |
| "learning_rate": 5.9789801479174776e-05, |
| "loss": 0.9706, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.7026134266006023, |
| "grad_norm": 0.1989748477935791, |
| "learning_rate": 5.9711950175165434e-05, |
| "loss": 1.0004, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.7030020402215098, |
| "grad_norm": 0.21304792165756226, |
| "learning_rate": 5.963409887115609e-05, |
| "loss": 1.0177, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.7033906538424172, |
| "grad_norm": 0.19023855030536652, |
| "learning_rate": 5.955624756714675e-05, |
| "loss": 0.9759, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.7037792674633246, |
| "grad_norm": 0.21915188431739807, |
| "learning_rate": 5.947839626313742e-05, |
| "loss": 1.0621, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.704167881084232, |
| "grad_norm": 0.21626822650432587, |
| "learning_rate": 5.940054495912807e-05, |
| "loss": 1.0144, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.7045564947051394, |
| "grad_norm": 0.20742040872573853, |
| "learning_rate": 5.932269365511872e-05, |
| "loss": 0.9778, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.7049451083260468, |
| "grad_norm": 0.2172158658504486, |
| "learning_rate": 5.924484235110939e-05, |
| "loss": 1.0416, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.7053337219469542, |
| "grad_norm": 0.209465891122818, |
| "learning_rate": 5.916699104710004e-05, |
| "loss": 1.0378, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.7057223355678617, |
| "grad_norm": 0.2097882628440857, |
| "learning_rate": 5.9089139743090705e-05, |
| "loss": 1.0166, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.706110949188769, |
| "grad_norm": 0.2251904308795929, |
| "learning_rate": 5.901128843908136e-05, |
| "loss": 1.0783, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.7064995628096765, |
| "grad_norm": 0.1952916979789734, |
| "learning_rate": 5.893343713507201e-05, |
| "loss": 0.993, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.7068881764305839, |
| "grad_norm": 0.20997455716133118, |
| "learning_rate": 5.8855585831062675e-05, |
| "loss": 1.0448, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.7072767900514914, |
| "grad_norm": 0.20070020854473114, |
| "learning_rate": 5.877773452705333e-05, |
| "loss": 0.9603, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7076654036723987, |
| "grad_norm": 0.25765034556388855, |
| "learning_rate": 5.869988322304399e-05, |
| "loss": 1.0361, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.7080540172933061, |
| "grad_norm": 0.21948982775211334, |
| "learning_rate": 5.862203191903465e-05, |
| "loss": 1.0668, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.7084426309142136, |
| "grad_norm": 0.1867108792066574, |
| "learning_rate": 5.85441806150253e-05, |
| "loss": 0.9372, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.7088312445351209, |
| "grad_norm": 0.2037520408630371, |
| "learning_rate": 5.846632931101597e-05, |
| "loss": 0.9905, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 0.21352072060108185, |
| "learning_rate": 5.838847800700662e-05, |
| "loss": 1.0514, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.7096084717769358, |
| "grad_norm": 0.1949845850467682, |
| "learning_rate": 5.831062670299727e-05, |
| "loss": 0.9636, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.7099970853978432, |
| "grad_norm": 0.2092294692993164, |
| "learning_rate": 5.823277539898794e-05, |
| "loss": 1.0361, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.7103856990187506, |
| "grad_norm": 0.20054267346858978, |
| "learning_rate": 5.815492409497859e-05, |
| "loss": 1.0195, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.7107743126396581, |
| "grad_norm": 0.2202107012271881, |
| "learning_rate": 5.8077072790969256e-05, |
| "loss": 1.0918, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.7111629262605654, |
| "grad_norm": 0.2001042366027832, |
| "learning_rate": 5.799922148695991e-05, |
| "loss": 1.0142, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.7115515398814728, |
| "grad_norm": 0.2102631777524948, |
| "learning_rate": 5.792137018295056e-05, |
| "loss": 1.0231, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.7119401535023803, |
| "grad_norm": 0.21717461943626404, |
| "learning_rate": 5.7843518878941226e-05, |
| "loss": 1.0295, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.7123287671232876, |
| "grad_norm": 0.2001933753490448, |
| "learning_rate": 5.776566757493188e-05, |
| "loss": 1.022, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.7127173807441951, |
| "grad_norm": 0.2218201756477356, |
| "learning_rate": 5.7687816270922544e-05, |
| "loss": 1.0762, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.7131059943651025, |
| "grad_norm": 0.20680001378059387, |
| "learning_rate": 5.76099649669132e-05, |
| "loss": 1.0017, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.7134946079860099, |
| "grad_norm": 0.21511508524417877, |
| "learning_rate": 5.7532113662903854e-05, |
| "loss": 1.048, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.7138832216069173, |
| "grad_norm": 0.19720061123371124, |
| "learning_rate": 5.745426235889452e-05, |
| "loss": 0.9983, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.7142718352278248, |
| "grad_norm": 0.2005409449338913, |
| "learning_rate": 5.737641105488517e-05, |
| "loss": 0.9941, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.7146604488487321, |
| "grad_norm": 0.2222924679517746, |
| "learning_rate": 5.729855975087584e-05, |
| "loss": 1.0476, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.7150490624696395, |
| "grad_norm": 0.21131208539009094, |
| "learning_rate": 5.722070844686649e-05, |
| "loss": 1.0208, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.715437676090547, |
| "grad_norm": 0.2307305932044983, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 0.9867, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.7158262897114543, |
| "grad_norm": 0.1974973827600479, |
| "learning_rate": 5.706500583884781e-05, |
| "loss": 1.0285, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.7162149033323618, |
| "grad_norm": 0.2006559520959854, |
| "learning_rate": 5.698715453483846e-05, |
| "loss": 1.024, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.7166035169532692, |
| "grad_norm": 0.21160584688186646, |
| "learning_rate": 5.690930323082911e-05, |
| "loss": 1.0256, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.7169921305741767, |
| "grad_norm": 0.28184664249420166, |
| "learning_rate": 5.683145192681978e-05, |
| "loss": 1.0443, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.717380744195084, |
| "grad_norm": 0.2206653356552124, |
| "learning_rate": 5.675360062281043e-05, |
| "loss": 1.0458, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.7177693578159915, |
| "grad_norm": 0.21346066892147064, |
| "learning_rate": 5.6675749318801095e-05, |
| "loss": 1.0106, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.7181579714368989, |
| "grad_norm": 0.20931747555732727, |
| "learning_rate": 5.6597898014791753e-05, |
| "loss": 0.9831, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.7185465850578063, |
| "grad_norm": 0.2026771456003189, |
| "learning_rate": 5.6520046710782406e-05, |
| "loss": 1.0162, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.7189351986787137, |
| "grad_norm": 0.21388716995716095, |
| "learning_rate": 5.644219540677307e-05, |
| "loss": 1.0867, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7193238122996211, |
| "grad_norm": 0.2039308398962021, |
| "learning_rate": 5.636434410276372e-05, |
| "loss": 1.0325, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.7197124259205285, |
| "grad_norm": 0.21741114556789398, |
| "learning_rate": 5.628649279875439e-05, |
| "loss": 1.0251, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.7201010395414359, |
| "grad_norm": 0.21343208849430084, |
| "learning_rate": 5.620864149474504e-05, |
| "loss": 1.0766, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.7204896531623434, |
| "grad_norm": 0.21712560951709747, |
| "learning_rate": 5.613079019073569e-05, |
| "loss": 1.0643, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.7208782667832507, |
| "grad_norm": 0.2176978886127472, |
| "learning_rate": 5.605293888672636e-05, |
| "loss": 1.0375, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.7212668804041582, |
| "grad_norm": 0.2065533846616745, |
| "learning_rate": 5.597508758271701e-05, |
| "loss": 1.0385, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.7216554940250656, |
| "grad_norm": 0.2169170081615448, |
| "learning_rate": 5.5897236278707676e-05, |
| "loss": 1.0197, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.722044107645973, |
| "grad_norm": 0.2047201544046402, |
| "learning_rate": 5.581938497469833e-05, |
| "loss": 0.9794, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.7224327212668804, |
| "grad_norm": 0.20898981392383575, |
| "learning_rate": 5.574153367068898e-05, |
| "loss": 1.032, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.7228213348877878, |
| "grad_norm": 0.2090533971786499, |
| "learning_rate": 5.5663682366679646e-05, |
| "loss": 1.0694, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7232099485086952, |
| "grad_norm": 0.21963149309158325, |
| "learning_rate": 5.5585831062670305e-05, |
| "loss": 1.0367, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.7235985621296026, |
| "grad_norm": 0.1974373459815979, |
| "learning_rate": 5.550797975866096e-05, |
| "loss": 1.0402, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.7239871757505101, |
| "grad_norm": 0.1924194097518921, |
| "learning_rate": 5.543012845465162e-05, |
| "loss": 0.9647, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.7243757893714174, |
| "grad_norm": 0.21366077661514282, |
| "learning_rate": 5.5352277150642274e-05, |
| "loss": 1.0139, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.7247644029923249, |
| "grad_norm": 0.21722929179668427, |
| "learning_rate": 5.527442584663294e-05, |
| "loss": 1.0366, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.7251530166132323, |
| "grad_norm": 0.20646587014198303, |
| "learning_rate": 5.519657454262359e-05, |
| "loss": 1.0465, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.7255416302341398, |
| "grad_norm": 0.19144394993782043, |
| "learning_rate": 5.5118723238614244e-05, |
| "loss": 0.9645, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.7259302438550471, |
| "grad_norm": 0.19553838670253754, |
| "learning_rate": 5.504087193460491e-05, |
| "loss": 0.98, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.7263188574759545, |
| "grad_norm": 0.21739792823791504, |
| "learning_rate": 5.496302063059556e-05, |
| "loss": 1.002, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.726707471096862, |
| "grad_norm": 0.1910562962293625, |
| "learning_rate": 5.488516932658623e-05, |
| "loss": 0.985, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7270960847177693, |
| "grad_norm": 0.2133384346961975, |
| "learning_rate": 5.480731802257688e-05, |
| "loss": 1.0325, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.7274846983386768, |
| "grad_norm": 0.21884119510650635, |
| "learning_rate": 5.472946671856753e-05, |
| "loss": 1.0412, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.7278733119595842, |
| "grad_norm": 0.21069306135177612, |
| "learning_rate": 5.46516154145582e-05, |
| "loss": 1.0474, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.7282619255804916, |
| "grad_norm": 0.19266243278980255, |
| "learning_rate": 5.4573764110548856e-05, |
| "loss": 0.9941, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.728650539201399, |
| "grad_norm": 0.21255099773406982, |
| "learning_rate": 5.4495912806539515e-05, |
| "loss": 1.0211, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7290391528223065, |
| "grad_norm": 0.1924402117729187, |
| "learning_rate": 5.4418061502530173e-05, |
| "loss": 1.0117, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.7294277664432138, |
| "grad_norm": 0.2019895315170288, |
| "learning_rate": 5.4340210198520825e-05, |
| "loss": 0.9921, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.7298163800641212, |
| "grad_norm": 0.20398026704788208, |
| "learning_rate": 5.426235889451149e-05, |
| "loss": 1.0423, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.7302049936850287, |
| "grad_norm": 0.20153217017650604, |
| "learning_rate": 5.418450759050214e-05, |
| "loss": 1.0333, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.730593607305936, |
| "grad_norm": 0.21259640157222748, |
| "learning_rate": 5.4106656286492795e-05, |
| "loss": 1.0689, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7309822209268435, |
| "grad_norm": 0.2037276029586792, |
| "learning_rate": 5.402880498248346e-05, |
| "loss": 1.0203, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.7313708345477509, |
| "grad_norm": 0.19976729154586792, |
| "learning_rate": 5.395095367847411e-05, |
| "loss": 1.0173, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.7317594481686583, |
| "grad_norm": 0.20481806993484497, |
| "learning_rate": 5.387310237446478e-05, |
| "loss": 0.9864, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.7321480617895657, |
| "grad_norm": 0.21900932490825653, |
| "learning_rate": 5.379525107045543e-05, |
| "loss": 1.0519, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.7325366754104732, |
| "grad_norm": 0.200319305062294, |
| "learning_rate": 5.371739976644609e-05, |
| "loss": 1.0834, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.7329252890313805, |
| "grad_norm": 0.19662296772003174, |
| "learning_rate": 5.363954846243675e-05, |
| "loss": 0.9794, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.7333139026522879, |
| "grad_norm": 0.2113952785730362, |
| "learning_rate": 5.356169715842741e-05, |
| "loss": 1.0763, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.7337025162731954, |
| "grad_norm": 0.21348755061626434, |
| "learning_rate": 5.3483845854418066e-05, |
| "loss": 1.0781, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.7340911298941027, |
| "grad_norm": 0.20673702657222748, |
| "learning_rate": 5.3405994550408725e-05, |
| "loss": 1.0513, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.7344797435150102, |
| "grad_norm": 0.210855171084404, |
| "learning_rate": 5.332814324639938e-05, |
| "loss": 0.9972, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7348683571359176, |
| "grad_norm": 0.2136204093694687, |
| "learning_rate": 5.325029194239004e-05, |
| "loss": 1.03, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.7352569707568251, |
| "grad_norm": 0.20035260915756226, |
| "learning_rate": 5.3172440638380694e-05, |
| "loss": 0.9739, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.7356455843777324, |
| "grad_norm": 0.1943352371454239, |
| "learning_rate": 5.309458933437136e-05, |
| "loss": 0.9411, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.7360341979986399, |
| "grad_norm": 0.3994326889514923, |
| "learning_rate": 5.301673803036201e-05, |
| "loss": 1.0714, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.7364228116195473, |
| "grad_norm": 0.21691356599330902, |
| "learning_rate": 5.2938886726352664e-05, |
| "loss": 1.0648, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.7368114252404547, |
| "grad_norm": 0.19853095710277557, |
| "learning_rate": 5.286103542234333e-05, |
| "loss": 0.983, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.7372000388613621, |
| "grad_norm": 0.21836897730827332, |
| "learning_rate": 5.278318411833398e-05, |
| "loss": 1.0396, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.7375886524822695, |
| "grad_norm": 0.19596605002880096, |
| "learning_rate": 5.270533281432464e-05, |
| "loss": 0.9593, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.7379772661031769, |
| "grad_norm": 0.2141752541065216, |
| "learning_rate": 5.26274815103153e-05, |
| "loss": 1.0373, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.7383658797240843, |
| "grad_norm": 0.20552939176559448, |
| "learning_rate": 5.254963020630596e-05, |
| "loss": 1.0352, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7387544933449918, |
| "grad_norm": 0.2095794975757599, |
| "learning_rate": 5.247177890229662e-05, |
| "loss": 1.0632, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.7391431069658991, |
| "grad_norm": 0.19894710183143616, |
| "learning_rate": 5.2393927598287276e-05, |
| "loss": 0.9886, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.7395317205868066, |
| "grad_norm": 0.22996319830417633, |
| "learning_rate": 5.231607629427793e-05, |
| "loss": 1.0826, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.739920334207714, |
| "grad_norm": 0.21416957676410675, |
| "learning_rate": 5.2238224990268593e-05, |
| "loss": 1.0161, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.7403089478286214, |
| "grad_norm": 0.21819345653057098, |
| "learning_rate": 5.2160373686259245e-05, |
| "loss": 1.0458, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7406975614495288, |
| "grad_norm": 0.21327044069766998, |
| "learning_rate": 5.208252238224991e-05, |
| "loss": 1.0721, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.7410861750704362, |
| "grad_norm": 0.21436645090579987, |
| "learning_rate": 5.200467107824056e-05, |
| "loss": 1.0743, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.7414747886913436, |
| "grad_norm": 0.215640127658844, |
| "learning_rate": 5.1926819774231215e-05, |
| "loss": 1.0274, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.741863402312251, |
| "grad_norm": 0.2043589949607849, |
| "learning_rate": 5.184896847022188e-05, |
| "loss": 1.0618, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.7422520159331585, |
| "grad_norm": 0.2014230340719223, |
| "learning_rate": 5.177111716621253e-05, |
| "loss": 0.9892, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7426406295540658, |
| "grad_norm": 0.19954468309879303, |
| "learning_rate": 5.16932658622032e-05, |
| "loss": 0.9815, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.7430292431749733, |
| "grad_norm": 0.23119708895683289, |
| "learning_rate": 5.161541455819385e-05, |
| "loss": 1.0783, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.7434178567958807, |
| "grad_norm": 0.20650482177734375, |
| "learning_rate": 5.153756325418451e-05, |
| "loss": 1.0162, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.7438064704167882, |
| "grad_norm": 0.20021970570087433, |
| "learning_rate": 5.145971195017517e-05, |
| "loss": 1.0062, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.7441950840376955, |
| "grad_norm": 0.23300811648368835, |
| "learning_rate": 5.138186064616583e-05, |
| "loss": 1.0049, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.7445836976586029, |
| "grad_norm": 0.23268327116966248, |
| "learning_rate": 5.130400934215648e-05, |
| "loss": 1.0138, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.7449723112795104, |
| "grad_norm": 0.20413407683372498, |
| "learning_rate": 5.1226158038147145e-05, |
| "loss": 0.9903, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.7453609249004177, |
| "grad_norm": 0.20714978873729706, |
| "learning_rate": 5.1148306734137797e-05, |
| "loss": 1.0374, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.7457495385213252, |
| "grad_norm": 0.2000850886106491, |
| "learning_rate": 5.107045543012846e-05, |
| "loss": 0.9885, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.7461381521422326, |
| "grad_norm": 0.2054719179868698, |
| "learning_rate": 5.0992604126119114e-05, |
| "loss": 1.0551, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.74652676576314, |
| "grad_norm": 0.2351357489824295, |
| "learning_rate": 5.0914752822109766e-05, |
| "loss": 1.0693, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.7469153793840474, |
| "grad_norm": 0.22370338439941406, |
| "learning_rate": 5.083690151810043e-05, |
| "loss": 0.9781, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.7473039930049549, |
| "grad_norm": 0.18734332919120789, |
| "learning_rate": 5.0759050214091084e-05, |
| "loss": 0.9329, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.7476926066258622, |
| "grad_norm": 0.22099906206130981, |
| "learning_rate": 5.068119891008175e-05, |
| "loss": 1.0498, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.7480812202467696, |
| "grad_norm": 0.20144490897655487, |
| "learning_rate": 5.06033476060724e-05, |
| "loss": 0.9865, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7484698338676771, |
| "grad_norm": 0.21770039200782776, |
| "learning_rate": 5.052549630206306e-05, |
| "loss": 1.0867, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.7488584474885844, |
| "grad_norm": 0.19649921357631683, |
| "learning_rate": 5.044764499805372e-05, |
| "loss": 0.9887, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.7492470611094919, |
| "grad_norm": 0.1940620392560959, |
| "learning_rate": 5.036979369404438e-05, |
| "loss": 1.0073, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.7496356747303993, |
| "grad_norm": 0.20987650752067566, |
| "learning_rate": 5.0291942390035044e-05, |
| "loss": 1.046, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.7500242883513067, |
| "grad_norm": 0.2116398960351944, |
| "learning_rate": 5.0214091086025696e-05, |
| "loss": 1.0423, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7504129019722141, |
| "grad_norm": 0.18996965885162354, |
| "learning_rate": 5.013623978201635e-05, |
| "loss": 0.9822, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.7508015155931216, |
| "grad_norm": 0.20942547917366028, |
| "learning_rate": 5.005838847800701e-05, |
| "loss": 1.0472, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.751190129214029, |
| "grad_norm": 0.19006839394569397, |
| "learning_rate": 4.9980537173997665e-05, |
| "loss": 0.993, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.7515787428349364, |
| "grad_norm": 0.21508941054344177, |
| "learning_rate": 4.9902685869988324e-05, |
| "loss": 1.0406, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.7519673564558438, |
| "grad_norm": 0.1989334225654602, |
| "learning_rate": 4.982483456597898e-05, |
| "loss": 0.9997, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7523559700767511, |
| "grad_norm": 0.19993600249290466, |
| "learning_rate": 4.974698326196964e-05, |
| "loss": 1.0139, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.7527445836976586, |
| "grad_norm": 0.20927831530570984, |
| "learning_rate": 4.9669131957960294e-05, |
| "loss": 0.995, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.753133197318566, |
| "grad_norm": 0.20963850617408752, |
| "learning_rate": 4.959128065395095e-05, |
| "loss": 1.0678, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.7535218109394735, |
| "grad_norm": 0.19523034989833832, |
| "learning_rate": 4.951342934994161e-05, |
| "loss": 0.9883, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.7539104245603808, |
| "grad_norm": 0.21588142216205597, |
| "learning_rate": 4.943557804593227e-05, |
| "loss": 1.0398, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7542990381812883, |
| "grad_norm": 0.19894704222679138, |
| "learning_rate": 4.935772674192293e-05, |
| "loss": 1.0125, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.7546876518021957, |
| "grad_norm": 0.2155168056488037, |
| "learning_rate": 4.927987543791359e-05, |
| "loss": 1.0447, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.7550762654231031, |
| "grad_norm": 0.212605819106102, |
| "learning_rate": 4.920202413390425e-05, |
| "loss": 1.077, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.7554648790440105, |
| "grad_norm": 0.2168148010969162, |
| "learning_rate": 4.9124172829894906e-05, |
| "loss": 1.0029, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.7558534926649179, |
| "grad_norm": 0.2020149528980255, |
| "learning_rate": 4.9046321525885565e-05, |
| "loss": 1.0684, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7562421062858253, |
| "grad_norm": 0.21063408255577087, |
| "learning_rate": 4.8968470221876217e-05, |
| "loss": 1.0147, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.7566307199067327, |
| "grad_norm": 0.19599388539791107, |
| "learning_rate": 4.8890618917866875e-05, |
| "loss": 0.9719, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.7570193335276402, |
| "grad_norm": 0.2158602923154831, |
| "learning_rate": 4.8812767613857534e-05, |
| "loss": 1.0439, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7574079471485475, |
| "grad_norm": 0.21013815701007843, |
| "learning_rate": 4.873491630984819e-05, |
| "loss": 1.0319, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.757796560769455, |
| "grad_norm": 0.2020798772573471, |
| "learning_rate": 4.8657065005838845e-05, |
| "loss": 1.0037, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7581851743903624, |
| "grad_norm": 0.21202047169208527, |
| "learning_rate": 4.8579213701829504e-05, |
| "loss": 0.9823, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.7585737880112698, |
| "grad_norm": 0.20750083029270172, |
| "learning_rate": 4.850136239782016e-05, |
| "loss": 1.0073, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.7589624016321772, |
| "grad_norm": 0.20938372611999512, |
| "learning_rate": 4.842351109381083e-05, |
| "loss": 1.0326, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.7593510152530846, |
| "grad_norm": 0.21984544396400452, |
| "learning_rate": 4.834565978980149e-05, |
| "loss": 1.0363, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.759739628873992, |
| "grad_norm": 0.20306189358234406, |
| "learning_rate": 4.826780848579214e-05, |
| "loss": 1.0374, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7601282424948994, |
| "grad_norm": 0.20631705224514008, |
| "learning_rate": 4.81899571817828e-05, |
| "loss": 1.0985, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7605168561158069, |
| "grad_norm": 0.22092190384864807, |
| "learning_rate": 4.811210587777346e-05, |
| "loss": 1.0216, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7609054697367142, |
| "grad_norm": 0.21419481933116913, |
| "learning_rate": 4.8034254573764116e-05, |
| "loss": 1.0327, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.7612940833576217, |
| "grad_norm": 0.1954476237297058, |
| "learning_rate": 4.795640326975477e-05, |
| "loss": 1.0139, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.7616826969785291, |
| "grad_norm": 0.21092113852500916, |
| "learning_rate": 4.7878551965745427e-05, |
| "loss": 1.0934, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7620713105994366, |
| "grad_norm": 0.1998988837003708, |
| "learning_rate": 4.7800700661736085e-05, |
| "loss": 0.9782, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7624599242203439, |
| "grad_norm": 0.20410674810409546, |
| "learning_rate": 4.7722849357726744e-05, |
| "loss": 1.0186, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.7628485378412513, |
| "grad_norm": 0.25312289595603943, |
| "learning_rate": 4.76449980537174e-05, |
| "loss": 1.0103, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.7632371514621588, |
| "grad_norm": 0.20648318529129028, |
| "learning_rate": 4.7567146749708055e-05, |
| "loss": 1.0314, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.7636257650830661, |
| "grad_norm": 0.20513702929019928, |
| "learning_rate": 4.7489295445698714e-05, |
| "loss": 0.981, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7640143787039736, |
| "grad_norm": 0.20063039660453796, |
| "learning_rate": 4.741144414168938e-05, |
| "loss": 1.0218, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.764402992324881, |
| "grad_norm": 0.20328521728515625, |
| "learning_rate": 4.733359283768004e-05, |
| "loss": 1.0614, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7647916059457884, |
| "grad_norm": 0.2209623008966446, |
| "learning_rate": 4.725574153367069e-05, |
| "loss": 1.0478, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7651802195666958, |
| "grad_norm": 0.2023559957742691, |
| "learning_rate": 4.717789022966135e-05, |
| "loss": 1.0455, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7655688331876033, |
| "grad_norm": 0.20461297035217285, |
| "learning_rate": 4.710003892565201e-05, |
| "loss": 0.9427, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 0.2108335793018341, |
| "learning_rate": 4.702218762164267e-05, |
| "loss": 1.0344, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.766346060429418, |
| "grad_norm": 0.20883473753929138, |
| "learning_rate": 4.6944336317633326e-05, |
| "loss": 1.0336, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.7667346740503255, |
| "grad_norm": 0.20144741237163544, |
| "learning_rate": 4.686648501362398e-05, |
| "loss": 1.0101, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.7671232876712328, |
| "grad_norm": 0.21269328892230988, |
| "learning_rate": 4.6788633709614637e-05, |
| "loss": 0.9989, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.7675119012921403, |
| "grad_norm": 0.20673738420009613, |
| "learning_rate": 4.6710782405605295e-05, |
| "loss": 1.0235, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7679005149130477, |
| "grad_norm": 0.1966594159603119, |
| "learning_rate": 4.6632931101595954e-05, |
| "loss": 1.0081, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7682891285339551, |
| "grad_norm": 0.22186829149723053, |
| "learning_rate": 4.6555079797586606e-05, |
| "loss": 1.0081, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.7686777421548625, |
| "grad_norm": 0.20602557063102722, |
| "learning_rate": 4.6477228493577265e-05, |
| "loss": 1.0381, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.76906635577577, |
| "grad_norm": 0.19581305980682373, |
| "learning_rate": 4.639937718956793e-05, |
| "loss": 1.0196, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.7694549693966773, |
| "grad_norm": 0.20162086188793182, |
| "learning_rate": 4.632152588555859e-05, |
| "loss": 1.0168, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7698435830175848, |
| "grad_norm": 0.21967145800590515, |
| "learning_rate": 4.624367458154925e-05, |
| "loss": 1.0339, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7702321966384922, |
| "grad_norm": 0.20245851576328278, |
| "learning_rate": 4.61658232775399e-05, |
| "loss": 1.0349, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.7706208102593995, |
| "grad_norm": 0.20409934222698212, |
| "learning_rate": 4.608797197353056e-05, |
| "loss": 1.0296, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.771009423880307, |
| "grad_norm": 0.19757163524627686, |
| "learning_rate": 4.601012066952122e-05, |
| "loss": 1.0443, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7713980375012144, |
| "grad_norm": 0.20038221776485443, |
| "learning_rate": 4.593226936551188e-05, |
| "loss": 1.0431, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7717866511221219, |
| "grad_norm": 0.2112458199262619, |
| "learning_rate": 4.585441806150253e-05, |
| "loss": 1.0553, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.7721752647430292, |
| "grad_norm": 0.21868042647838593, |
| "learning_rate": 4.577656675749319e-05, |
| "loss": 1.0061, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7725638783639367, |
| "grad_norm": 0.22484582662582397, |
| "learning_rate": 4.5698715453483846e-05, |
| "loss": 1.0831, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.7729524919848441, |
| "grad_norm": 0.20265011489391327, |
| "learning_rate": 4.5620864149474505e-05, |
| "loss": 1.0206, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.7733411056057515, |
| "grad_norm": 0.2052810937166214, |
| "learning_rate": 4.5543012845465164e-05, |
| "loss": 1.0366, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7737297192266589, |
| "grad_norm": 0.21016088128089905, |
| "learning_rate": 4.546516154145582e-05, |
| "loss": 0.9963, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.7741183328475663, |
| "grad_norm": 0.19719412922859192, |
| "learning_rate": 4.538731023744648e-05, |
| "loss": 0.9853, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.7745069464684737, |
| "grad_norm": 0.20447245240211487, |
| "learning_rate": 4.530945893343714e-05, |
| "loss": 0.9977, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.7748955600893811, |
| "grad_norm": 0.21796588599681854, |
| "learning_rate": 4.52316076294278e-05, |
| "loss": 1.0949, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.7752841737102886, |
| "grad_norm": 0.2041284590959549, |
| "learning_rate": 4.515375632541845e-05, |
| "loss": 1.0034, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7756727873311959, |
| "grad_norm": 0.21134726703166962, |
| "learning_rate": 4.507590502140911e-05, |
| "loss": 1.0076, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7760614009521034, |
| "grad_norm": 0.20730996131896973, |
| "learning_rate": 4.499805371739977e-05, |
| "loss": 1.0456, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.7764500145730108, |
| "grad_norm": 0.22316931188106537, |
| "learning_rate": 4.492020241339043e-05, |
| "loss": 0.9418, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.7768386281939182, |
| "grad_norm": 0.21494819223880768, |
| "learning_rate": 4.484235110938109e-05, |
| "loss": 1.0597, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7772272418148256, |
| "grad_norm": 0.20344491302967072, |
| "learning_rate": 4.476449980537174e-05, |
| "loss": 0.9749, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.777615855435733, |
| "grad_norm": 0.20816263556480408, |
| "learning_rate": 4.46866485013624e-05, |
| "loss": 1.0526, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.7780044690566404, |
| "grad_norm": 0.21490095555782318, |
| "learning_rate": 4.4608797197353056e-05, |
| "loss": 1.0311, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7783930826775478, |
| "grad_norm": 0.2043679803609848, |
| "learning_rate": 4.4530945893343715e-05, |
| "loss": 1.0176, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.7787816962984553, |
| "grad_norm": 0.2015836238861084, |
| "learning_rate": 4.4453094589334374e-05, |
| "loss": 1.015, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.7791703099193626, |
| "grad_norm": 0.21843332052230835, |
| "learning_rate": 4.437524328532503e-05, |
| "loss": 1.0577, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7795589235402701, |
| "grad_norm": 0.20447933673858643, |
| "learning_rate": 4.429739198131569e-05, |
| "loss": 1.0549, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7799475371611775, |
| "grad_norm": 0.20317135751247406, |
| "learning_rate": 4.421954067730635e-05, |
| "loss": 1.0419, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.780336150782085, |
| "grad_norm": 0.20233985781669617, |
| "learning_rate": 4.414168937329701e-05, |
| "loss": 0.9743, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.7807247644029923, |
| "grad_norm": 0.1957770437002182, |
| "learning_rate": 4.406383806928766e-05, |
| "loss": 1.0306, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.7811133780238997, |
| "grad_norm": 0.2055465579032898, |
| "learning_rate": 4.398598676527832e-05, |
| "loss": 0.9917, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7815019916448072, |
| "grad_norm": 0.1980140060186386, |
| "learning_rate": 4.390813546126898e-05, |
| "loss": 1.0002, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7818906052657145, |
| "grad_norm": 0.21538390219211578, |
| "learning_rate": 4.383028415725964e-05, |
| "loss": 0.9784, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.782279218886622, |
| "grad_norm": 0.20209911465644836, |
| "learning_rate": 4.375243285325029e-05, |
| "loss": 1.0403, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.7826678325075294, |
| "grad_norm": 0.22064533829689026, |
| "learning_rate": 4.367458154924095e-05, |
| "loss": 1.0816, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7830564461284368, |
| "grad_norm": 0.21721522510051727, |
| "learning_rate": 4.359673024523161e-05, |
| "loss": 1.0215, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7834450597493442, |
| "grad_norm": 0.21042165160179138, |
| "learning_rate": 4.3518878941222266e-05, |
| "loss": 0.9993, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7838336733702517, |
| "grad_norm": 0.2821733355522156, |
| "learning_rate": 4.3441027637212925e-05, |
| "loss": 1.0337, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.784222286991159, |
| "grad_norm": 0.1997404247522354, |
| "learning_rate": 4.3363176333203584e-05, |
| "loss": 0.9635, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7846109006120664, |
| "grad_norm": 0.21088410913944244, |
| "learning_rate": 4.328532502919424e-05, |
| "loss": 1.0809, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.7849995142329739, |
| "grad_norm": 0.22041834890842438, |
| "learning_rate": 4.32074737251849e-05, |
| "loss": 1.0553, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7853881278538812, |
| "grad_norm": 0.21541887521743774, |
| "learning_rate": 4.312962242117556e-05, |
| "loss": 1.0348, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7857767414747887, |
| "grad_norm": 0.19423037767410278, |
| "learning_rate": 4.305177111716621e-05, |
| "loss": 0.9566, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7861653550956961, |
| "grad_norm": 0.20975807309150696, |
| "learning_rate": 4.297391981315687e-05, |
| "loss": 0.9946, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7865539687166035, |
| "grad_norm": 0.1911199390888214, |
| "learning_rate": 4.289606850914753e-05, |
| "loss": 0.9582, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7869425823375109, |
| "grad_norm": 0.20895734429359436, |
| "learning_rate": 4.281821720513819e-05, |
| "loss": 1.02, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.7873311959584184, |
| "grad_norm": 0.19652803242206573, |
| "learning_rate": 4.274036590112885e-05, |
| "loss": 0.9919, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7877198095793257, |
| "grad_norm": 0.21050991117954254, |
| "learning_rate": 4.26625145971195e-05, |
| "loss": 1.0363, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.7881084232002332, |
| "grad_norm": 0.18776053190231323, |
| "learning_rate": 4.258466329311016e-05, |
| "loss": 0.9747, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7884970368211406, |
| "grad_norm": 0.20973272621631622, |
| "learning_rate": 4.250681198910082e-05, |
| "loss": 1.0457, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.788885650442048, |
| "grad_norm": 0.22028960287570953, |
| "learning_rate": 4.2428960685091476e-05, |
| "loss": 1.0769, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7892742640629554, |
| "grad_norm": 0.20541588962078094, |
| "learning_rate": 4.2351109381082135e-05, |
| "loss": 1.0456, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7896628776838628, |
| "grad_norm": 0.19365350902080536, |
| "learning_rate": 4.2273258077072794e-05, |
| "loss": 0.9708, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7900514913047703, |
| "grad_norm": 0.21286098659038544, |
| "learning_rate": 4.219540677306345e-05, |
| "loss": 1.0443, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.7904401049256776, |
| "grad_norm": 0.20527319610118866, |
| "learning_rate": 4.211755546905411e-05, |
| "loss": 1.0165, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.7908287185465851, |
| "grad_norm": 0.20962440967559814, |
| "learning_rate": 4.203970416504477e-05, |
| "loss": 1.0723, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7912173321674925, |
| "grad_norm": 0.21032460033893585, |
| "learning_rate": 4.196185286103542e-05, |
| "loss": 1.0384, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.7916059457883999, |
| "grad_norm": 0.22122742235660553, |
| "learning_rate": 4.188400155702608e-05, |
| "loss": 1.0239, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.7919945594093073, |
| "grad_norm": 0.21430088579654694, |
| "learning_rate": 4.180615025301674e-05, |
| "loss": 1.0421, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.7923831730302147, |
| "grad_norm": 0.200826957821846, |
| "learning_rate": 4.17282989490074e-05, |
| "loss": 1.0403, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.7927717866511221, |
| "grad_norm": 0.1936146765947342, |
| "learning_rate": 4.165044764499805e-05, |
| "loss": 0.9901, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7931604002720295, |
| "grad_norm": 0.21162614226341248, |
| "learning_rate": 4.157259634098871e-05, |
| "loss": 1.0809, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.793549013892937, |
| "grad_norm": 0.1934708207845688, |
| "learning_rate": 4.149474503697937e-05, |
| "loss": 0.996, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.7939376275138443, |
| "grad_norm": 0.19730836153030396, |
| "learning_rate": 4.141689373297003e-05, |
| "loss": 1.0116, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.7943262411347518, |
| "grad_norm": 0.19641950726509094, |
| "learning_rate": 4.1339042428960686e-05, |
| "loss": 1.0554, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.7947148547556592, |
| "grad_norm": 0.1926102489233017, |
| "learning_rate": 4.1261191124951345e-05, |
| "loss": 0.9244, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7951034683765666, |
| "grad_norm": 0.20683708786964417, |
| "learning_rate": 4.1183339820942004e-05, |
| "loss": 1.0247, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.795492081997474, |
| "grad_norm": 0.21519975364208221, |
| "learning_rate": 4.110548851693266e-05, |
| "loss": 1.0364, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.7958806956183814, |
| "grad_norm": 0.19510744512081146, |
| "learning_rate": 4.102763721292332e-05, |
| "loss": 0.9807, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.7962693092392888, |
| "grad_norm": 0.21060147881507874, |
| "learning_rate": 4.094978590891398e-05, |
| "loss": 1.0007, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.7966579228601962, |
| "grad_norm": 0.19922667741775513, |
| "learning_rate": 4.087193460490463e-05, |
| "loss": 0.9953, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7970465364811037, |
| "grad_norm": 0.2217833250761032, |
| "learning_rate": 4.079408330089529e-05, |
| "loss": 1.0359, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.797435150102011, |
| "grad_norm": 0.2138615995645523, |
| "learning_rate": 4.071623199688595e-05, |
| "loss": 1.0473, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.7978237637229185, |
| "grad_norm": 0.20814841985702515, |
| "learning_rate": 4.063838069287661e-05, |
| "loss": 1.042, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.7982123773438259, |
| "grad_norm": 0.21378004550933838, |
| "learning_rate": 4.056052938886726e-05, |
| "loss": 1.0616, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.7986009909647334, |
| "grad_norm": 0.22064481675624847, |
| "learning_rate": 4.048267808485792e-05, |
| "loss": 1.063, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7989896045856407, |
| "grad_norm": 0.21143454313278198, |
| "learning_rate": 4.040482678084858e-05, |
| "loss": 0.9999, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.7993782182065481, |
| "grad_norm": 0.2092997431755066, |
| "learning_rate": 4.032697547683924e-05, |
| "loss": 0.9958, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.7997668318274556, |
| "grad_norm": 0.2715415954589844, |
| "learning_rate": 4.0249124172829896e-05, |
| "loss": 0.9981, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.8001554454483629, |
| "grad_norm": 0.20481626689434052, |
| "learning_rate": 4.0171272868820555e-05, |
| "loss": 1.0187, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.8005440590692704, |
| "grad_norm": 0.2076139748096466, |
| "learning_rate": 4.0093421564811214e-05, |
| "loss": 1.0147, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.8009326726901778, |
| "grad_norm": 0.21985560655593872, |
| "learning_rate": 4.001557026080187e-05, |
| "loss": 1.0436, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.8013212863110852, |
| "grad_norm": 0.2088089883327484, |
| "learning_rate": 3.993771895679253e-05, |
| "loss": 1.067, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.8017098999319926, |
| "grad_norm": 0.23079900443553925, |
| "learning_rate": 3.9859867652783184e-05, |
| "loss": 1.0208, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.8020985135529001, |
| "grad_norm": 0.20904935896396637, |
| "learning_rate": 3.978201634877384e-05, |
| "loss": 1.0417, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.8024871271738074, |
| "grad_norm": 0.2027217298746109, |
| "learning_rate": 3.97041650447645e-05, |
| "loss": 1.0466, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.8028757407947149, |
| "grad_norm": 0.2080574333667755, |
| "learning_rate": 3.962631374075516e-05, |
| "loss": 1.0004, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.8032643544156223, |
| "grad_norm": 0.2076699584722519, |
| "learning_rate": 3.954846243674582e-05, |
| "loss": 1.0288, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.8036529680365296, |
| "grad_norm": 0.20526565611362457, |
| "learning_rate": 3.947061113273647e-05, |
| "loss": 0.9627, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.8040415816574371, |
| "grad_norm": 0.2086559236049652, |
| "learning_rate": 3.939275982872713e-05, |
| "loss": 1.057, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.8044301952783445, |
| "grad_norm": 0.21741564571857452, |
| "learning_rate": 3.931490852471779e-05, |
| "loss": 1.0575, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.804818808899252, |
| "grad_norm": 0.19239796698093414, |
| "learning_rate": 3.923705722070845e-05, |
| "loss": 1.0028, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.8052074225201593, |
| "grad_norm": 0.20606793463230133, |
| "learning_rate": 3.9159205916699106e-05, |
| "loss": 1.0305, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.8055960361410668, |
| "grad_norm": 0.2197132408618927, |
| "learning_rate": 3.9081354612689765e-05, |
| "loss": 1.0669, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.8059846497619741, |
| "grad_norm": 0.19510973989963531, |
| "learning_rate": 3.9003503308680424e-05, |
| "loss": 0.984, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.8063732633828816, |
| "grad_norm": 0.20135273039340973, |
| "learning_rate": 3.892565200467108e-05, |
| "loss": 1.0528, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.806761877003789, |
| "grad_norm": 0.20280520617961884, |
| "learning_rate": 3.884780070066174e-05, |
| "loss": 1.0185, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.8071504906246963, |
| "grad_norm": 0.21787187457084656, |
| "learning_rate": 3.8769949396652394e-05, |
| "loss": 1.0875, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.8075391042456038, |
| "grad_norm": 0.21521267294883728, |
| "learning_rate": 3.869209809264305e-05, |
| "loss": 1.0352, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.8079277178665112, |
| "grad_norm": 0.21675272285938263, |
| "learning_rate": 3.861424678863371e-05, |
| "loss": 1.0178, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.8083163314874187, |
| "grad_norm": 0.20301300287246704, |
| "learning_rate": 3.853639548462437e-05, |
| "loss": 1.042, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.808704945108326, |
| "grad_norm": 0.2025609016418457, |
| "learning_rate": 3.845854418061502e-05, |
| "loss": 1.0224, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.8090935587292335, |
| "grad_norm": 0.23724251985549927, |
| "learning_rate": 3.838069287660568e-05, |
| "loss": 1.0051, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.8094821723501409, |
| "grad_norm": 0.17473214864730835, |
| "learning_rate": 3.830284157259634e-05, |
| "loss": 0.9183, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.8098707859710483, |
| "grad_norm": 0.20575867593288422, |
| "learning_rate": 3.8224990268587e-05, |
| "loss": 1.0018, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.8102593995919557, |
| "grad_norm": 0.2054753601551056, |
| "learning_rate": 3.8147138964577664e-05, |
| "loss": 1.0326, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.8106480132128631, |
| "grad_norm": 0.22283188998699188, |
| "learning_rate": 3.8069287660568316e-05, |
| "loss": 1.0878, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.8110366268337705, |
| "grad_norm": 0.20678454637527466, |
| "learning_rate": 3.7991436356558975e-05, |
| "loss": 1.0382, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.8114252404546779, |
| "grad_norm": 0.22482691705226898, |
| "learning_rate": 3.7913585052549634e-05, |
| "loss": 1.0441, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.8118138540755854, |
| "grad_norm": 0.19913192093372345, |
| "learning_rate": 3.783573374854029e-05, |
| "loss": 0.9093, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.8122024676964927, |
| "grad_norm": 0.21512696146965027, |
| "learning_rate": 3.7757882444530945e-05, |
| "loss": 1.0589, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8125910813174002, |
| "grad_norm": 0.20883330702781677, |
| "learning_rate": 3.7680031140521604e-05, |
| "loss": 0.9773, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.8129796949383076, |
| "grad_norm": 0.20254108309745789, |
| "learning_rate": 3.760217983651226e-05, |
| "loss": 1.0111, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.813368308559215, |
| "grad_norm": 0.22513622045516968, |
| "learning_rate": 3.752432853250292e-05, |
| "loss": 1.0471, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.8137569221801224, |
| "grad_norm": 0.20943938195705414, |
| "learning_rate": 3.744647722849358e-05, |
| "loss": 1.0261, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.8141455358010298, |
| "grad_norm": 0.19357722997665405, |
| "learning_rate": 3.736862592448423e-05, |
| "loss": 0.9891, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.8145341494219372, |
| "grad_norm": 0.20199090242385864, |
| "learning_rate": 3.729077462047489e-05, |
| "loss": 1.0017, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.8149227630428446, |
| "grad_norm": 0.22087882459163666, |
| "learning_rate": 3.721292331646556e-05, |
| "loss": 1.0176, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.8153113766637521, |
| "grad_norm": 0.19757211208343506, |
| "learning_rate": 3.7135072012456215e-05, |
| "loss": 0.9993, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.8156999902846594, |
| "grad_norm": 0.21485236287117004, |
| "learning_rate": 3.705722070844687e-05, |
| "loss": 1.0129, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.8160886039055669, |
| "grad_norm": 0.2095671445131302, |
| "learning_rate": 3.6979369404437526e-05, |
| "loss": 1.0576, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8164772175264743, |
| "grad_norm": 0.21392807364463806, |
| "learning_rate": 3.6901518100428185e-05, |
| "loss": 1.0666, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.8168658311473818, |
| "grad_norm": 0.23267820477485657, |
| "learning_rate": 3.6823666796418844e-05, |
| "loss": 1.0691, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.8172544447682891, |
| "grad_norm": 0.3778455853462219, |
| "learning_rate": 3.67458154924095e-05, |
| "loss": 1.057, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.8176430583891965, |
| "grad_norm": 0.21719984710216522, |
| "learning_rate": 3.6667964188400155e-05, |
| "loss": 1.0564, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.818031672010104, |
| "grad_norm": 0.19418101012706757, |
| "learning_rate": 3.6590112884390814e-05, |
| "loss": 1.0156, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.8184202856310113, |
| "grad_norm": 0.20592990517616272, |
| "learning_rate": 3.651226158038147e-05, |
| "loss": 1.026, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.8188088992519188, |
| "grad_norm": 0.21999908983707428, |
| "learning_rate": 3.643441027637213e-05, |
| "loss": 1.0575, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.8191975128728262, |
| "grad_norm": 0.2080504447221756, |
| "learning_rate": 3.635655897236278e-05, |
| "loss": 1.0236, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.8195861264937336, |
| "grad_norm": 0.20104867219924927, |
| "learning_rate": 3.627870766835344e-05, |
| "loss": 0.9626, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.819974740114641, |
| "grad_norm": 0.18993836641311646, |
| "learning_rate": 3.620085636434411e-05, |
| "loss": 0.983, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.8203633537355485, |
| "grad_norm": 0.18710492551326752, |
| "learning_rate": 3.6123005060334767e-05, |
| "loss": 0.9674, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.8207519673564558, |
| "grad_norm": 0.2117459774017334, |
| "learning_rate": 3.6045153756325425e-05, |
| "loss": 1.0263, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.8211405809773633, |
| "grad_norm": 0.2005959451198578, |
| "learning_rate": 3.596730245231608e-05, |
| "loss": 1.0405, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.8215291945982707, |
| "grad_norm": 0.21586982905864716, |
| "learning_rate": 3.5889451148306736e-05, |
| "loss": 0.9715, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.821917808219178, |
| "grad_norm": 0.2229696810245514, |
| "learning_rate": 3.5811599844297395e-05, |
| "loss": 1.0427, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.8223064218400855, |
| "grad_norm": 0.22296395897865295, |
| "learning_rate": 3.5733748540288054e-05, |
| "loss": 1.093, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.8226950354609929, |
| "grad_norm": 0.22912591695785522, |
| "learning_rate": 3.5655897236278706e-05, |
| "loss": 1.0821, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.8230836490819003, |
| "grad_norm": 0.19285057485103607, |
| "learning_rate": 3.5578045932269365e-05, |
| "loss": 0.9694, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.8234722627028077, |
| "grad_norm": 0.2150295525789261, |
| "learning_rate": 3.5500194628260024e-05, |
| "loss": 1.0277, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.8238608763237152, |
| "grad_norm": 0.20686036348342896, |
| "learning_rate": 3.542234332425068e-05, |
| "loss": 0.9946, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8242494899446225, |
| "grad_norm": 0.21742792427539825, |
| "learning_rate": 3.534449202024134e-05, |
| "loss": 1.0233, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.82463810356553, |
| "grad_norm": 0.2077355682849884, |
| "learning_rate": 3.526664071623199e-05, |
| "loss": 0.9918, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.8250267171864374, |
| "grad_norm": 0.2552899122238159, |
| "learning_rate": 3.518878941222266e-05, |
| "loss": 0.9648, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.8254153308073447, |
| "grad_norm": 0.21043844521045685, |
| "learning_rate": 3.511093810821332e-05, |
| "loss": 1.023, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.8258039444282522, |
| "grad_norm": 0.22360606491565704, |
| "learning_rate": 3.5033086804203977e-05, |
| "loss": 1.0862, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.8261925580491596, |
| "grad_norm": 0.20735731720924377, |
| "learning_rate": 3.495523550019463e-05, |
| "loss": 1.017, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.8265811716700671, |
| "grad_norm": 0.21998152136802673, |
| "learning_rate": 3.487738419618529e-05, |
| "loss": 1.0273, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.8269697852909744, |
| "grad_norm": 0.23547297716140747, |
| "learning_rate": 3.4799532892175946e-05, |
| "loss": 1.0353, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.8273583989118819, |
| "grad_norm": 0.20162945985794067, |
| "learning_rate": 3.4721681588166605e-05, |
| "loss": 1.0289, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.8277470125327893, |
| "grad_norm": 0.1959386169910431, |
| "learning_rate": 3.4643830284157264e-05, |
| "loss": 1.012, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8281356261536967, |
| "grad_norm": 0.21625256538391113, |
| "learning_rate": 3.4565978980147916e-05, |
| "loss": 1.0718, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.8285242397746041, |
| "grad_norm": 0.2094646692276001, |
| "learning_rate": 3.4488127676138575e-05, |
| "loss": 1.0157, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.8289128533955115, |
| "grad_norm": 0.19329530000686646, |
| "learning_rate": 3.4410276372129234e-05, |
| "loss": 0.9652, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.8293014670164189, |
| "grad_norm": 0.19125741720199585, |
| "learning_rate": 3.433242506811989e-05, |
| "loss": 0.9964, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.8296900806373263, |
| "grad_norm": 0.1942203938961029, |
| "learning_rate": 3.425457376411055e-05, |
| "loss": 0.9795, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.8300786942582338, |
| "grad_norm": 0.2229314148426056, |
| "learning_rate": 3.417672246010121e-05, |
| "loss": 1.1052, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.8304673078791411, |
| "grad_norm": 0.2160118967294693, |
| "learning_rate": 3.409887115609187e-05, |
| "loss": 1.0263, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.8308559215000486, |
| "grad_norm": 0.2106090933084488, |
| "learning_rate": 3.402101985208253e-05, |
| "loss": 1.0151, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.831244535120956, |
| "grad_norm": 0.31897667050361633, |
| "learning_rate": 3.3943168548073187e-05, |
| "loss": 1.0122, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.8316331487418634, |
| "grad_norm": 0.20475897192955017, |
| "learning_rate": 3.386531724406384e-05, |
| "loss": 1.0239, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8320217623627708, |
| "grad_norm": 0.21326549351215363, |
| "learning_rate": 3.37874659400545e-05, |
| "loss": 1.05, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.8324103759836782, |
| "grad_norm": 0.2130986452102661, |
| "learning_rate": 3.3709614636045156e-05, |
| "loss": 0.9979, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.8327989896045856, |
| "grad_norm": 0.20519514381885529, |
| "learning_rate": 3.3631763332035815e-05, |
| "loss": 1.035, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.833187603225493, |
| "grad_norm": 0.21058332920074463, |
| "learning_rate": 3.355391202802647e-05, |
| "loss": 1.0509, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.8335762168464005, |
| "grad_norm": 0.20692919194698334, |
| "learning_rate": 3.3476060724017126e-05, |
| "loss": 1.0262, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.8339648304673078, |
| "grad_norm": 0.20325800776481628, |
| "learning_rate": 3.3398209420007785e-05, |
| "loss": 1.0352, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.8343534440882153, |
| "grad_norm": 0.18956026434898376, |
| "learning_rate": 3.3320358115998444e-05, |
| "loss": 0.9618, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.8347420577091227, |
| "grad_norm": 0.24605980515480042, |
| "learning_rate": 3.32425068119891e-05, |
| "loss": 0.9785, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.8351306713300302, |
| "grad_norm": 0.20649299025535583, |
| "learning_rate": 3.316465550797976e-05, |
| "loss": 1.0051, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.8355192849509375, |
| "grad_norm": 0.21091307699680328, |
| "learning_rate": 3.308680420397042e-05, |
| "loss": 1.0321, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.835907898571845, |
| "grad_norm": 0.20463331043720245, |
| "learning_rate": 3.300895289996108e-05, |
| "loss": 1.0103, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.8362965121927524, |
| "grad_norm": 0.1851118803024292, |
| "learning_rate": 3.293110159595174e-05, |
| "loss": 0.9193, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.8366851258136597, |
| "grad_norm": 0.22127285599708557, |
| "learning_rate": 3.285325029194239e-05, |
| "loss": 1.0593, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.8370737394345672, |
| "grad_norm": 0.2060239166021347, |
| "learning_rate": 3.277539898793305e-05, |
| "loss": 1.1002, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.8374623530554746, |
| "grad_norm": 0.20628675818443298, |
| "learning_rate": 3.269754768392371e-05, |
| "loss": 1.0449, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.837850966676382, |
| "grad_norm": 0.2015877068042755, |
| "learning_rate": 3.2619696379914366e-05, |
| "loss": 1.0007, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.8382395802972894, |
| "grad_norm": 0.26001277565956116, |
| "learning_rate": 3.2541845075905025e-05, |
| "loss": 1.0593, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.8386281939181969, |
| "grad_norm": 0.21557845175266266, |
| "learning_rate": 3.246399377189568e-05, |
| "loss": 1.0206, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.8390168075391042, |
| "grad_norm": 0.21529968082904816, |
| "learning_rate": 3.2386142467886336e-05, |
| "loss": 1.0648, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.8394054211600117, |
| "grad_norm": 0.22108668088912964, |
| "learning_rate": 3.2308291163876995e-05, |
| "loss": 1.0192, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8397940347809191, |
| "grad_norm": 0.20087426900863647, |
| "learning_rate": 3.2230439859867654e-05, |
| "loss": 0.9972, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.8401826484018264, |
| "grad_norm": 0.2194579839706421, |
| "learning_rate": 3.215258855585831e-05, |
| "loss": 1.0222, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.8405712620227339, |
| "grad_norm": 0.2581467926502228, |
| "learning_rate": 3.207473725184897e-05, |
| "loss": 1.0369, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.8409598756436413, |
| "grad_norm": 0.20566490292549133, |
| "learning_rate": 3.199688594783963e-05, |
| "loss": 1.0453, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.8413484892645487, |
| "grad_norm": 0.20137596130371094, |
| "learning_rate": 3.191903464383029e-05, |
| "loss": 1.0404, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.8417371028854561, |
| "grad_norm": 0.2136070281267166, |
| "learning_rate": 3.184118333982095e-05, |
| "loss": 0.998, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.8421257165063636, |
| "grad_norm": 0.2082609087228775, |
| "learning_rate": 3.17633320358116e-05, |
| "loss": 1.0617, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.842514330127271, |
| "grad_norm": 0.20818866789340973, |
| "learning_rate": 3.168548073180226e-05, |
| "loss": 0.9739, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.8429029437481784, |
| "grad_norm": 0.1998904049396515, |
| "learning_rate": 3.160762942779292e-05, |
| "loss": 0.9984, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.8432915573690858, |
| "grad_norm": 0.2000143975019455, |
| "learning_rate": 3.1529778123783576e-05, |
| "loss": 0.9975, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8436801709899932, |
| "grad_norm": 0.20654286444187164, |
| "learning_rate": 3.145192681977423e-05, |
| "loss": 1.0403, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.8440687846109006, |
| "grad_norm": 0.20888234674930573, |
| "learning_rate": 3.137407551576489e-05, |
| "loss": 1.0072, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.844457398231808, |
| "grad_norm": 0.20207738876342773, |
| "learning_rate": 3.1296224211755546e-05, |
| "loss": 1.0361, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.8448460118527155, |
| "grad_norm": 0.2032788097858429, |
| "learning_rate": 3.1218372907746205e-05, |
| "loss": 1.0179, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.8452346254736228, |
| "grad_norm": 0.22794555127620697, |
| "learning_rate": 3.1140521603736864e-05, |
| "loss": 1.0337, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.8456232390945303, |
| "grad_norm": 0.20593926310539246, |
| "learning_rate": 3.106267029972752e-05, |
| "loss": 1.0336, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.8460118527154377, |
| "grad_norm": 0.20535798370838165, |
| "learning_rate": 3.098481899571818e-05, |
| "loss": 1.0465, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.8464004663363451, |
| "grad_norm": 0.2055482417345047, |
| "learning_rate": 3.090696769170884e-05, |
| "loss": 1.0073, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.8467890799572525, |
| "grad_norm": 0.20908206701278687, |
| "learning_rate": 3.08291163876995e-05, |
| "loss": 1.0478, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.8471776935781599, |
| "grad_norm": 0.20747126638889313, |
| "learning_rate": 3.075126508369015e-05, |
| "loss": 1.0621, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8475663071990673, |
| "grad_norm": 0.28445661067962646, |
| "learning_rate": 3.067341377968081e-05, |
| "loss": 1.0546, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.8479549208199747, |
| "grad_norm": 0.1851411610841751, |
| "learning_rate": 3.059556247567147e-05, |
| "loss": 0.9759, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.8483435344408822, |
| "grad_norm": 0.1998148262500763, |
| "learning_rate": 3.051771117166213e-05, |
| "loss": 1.0138, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.8487321480617895, |
| "grad_norm": 0.20033158361911774, |
| "learning_rate": 3.0439859867652786e-05, |
| "loss": 1.0346, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.849120761682697, |
| "grad_norm": 0.1972794383764267, |
| "learning_rate": 3.036200856364344e-05, |
| "loss": 1.0476, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.8495093753036044, |
| "grad_norm": 0.23393818736076355, |
| "learning_rate": 3.02841572596341e-05, |
| "loss": 0.9738, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.8498979889245118, |
| "grad_norm": 0.1907467097043991, |
| "learning_rate": 3.020630595562476e-05, |
| "loss": 0.966, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.8502866025454192, |
| "grad_norm": 0.19281136989593506, |
| "learning_rate": 3.0128454651615418e-05, |
| "loss": 1.0016, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.8506752161663266, |
| "grad_norm": 0.2053443342447281, |
| "learning_rate": 3.005060334760607e-05, |
| "loss": 1.0659, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.2173933982849121, |
| "learning_rate": 2.997275204359673e-05, |
| "loss": 1.0137, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8514524434081414, |
| "grad_norm": 0.22902634739875793, |
| "learning_rate": 2.9894900739587388e-05, |
| "loss": 1.0408, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.8518410570290489, |
| "grad_norm": 0.2113914042711258, |
| "learning_rate": 2.9817049435578047e-05, |
| "loss": 1.0769, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.8522296706499563, |
| "grad_norm": 0.20389114320278168, |
| "learning_rate": 2.973919813156871e-05, |
| "loss": 0.9835, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.8526182842708637, |
| "grad_norm": 0.2062385231256485, |
| "learning_rate": 2.966134682755936e-05, |
| "loss": 1.0397, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.8530068978917711, |
| "grad_norm": 0.20552967488765717, |
| "learning_rate": 2.958349552355002e-05, |
| "loss": 0.9949, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.8533955115126786, |
| "grad_norm": 0.1985877901315689, |
| "learning_rate": 2.950564421954068e-05, |
| "loss": 0.9909, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.8537841251335859, |
| "grad_norm": 0.20005984604358673, |
| "learning_rate": 2.9427792915531337e-05, |
| "loss": 0.9603, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.8541727387544934, |
| "grad_norm": 0.20039033889770508, |
| "learning_rate": 2.9349941611521996e-05, |
| "loss": 0.9832, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.8545613523754008, |
| "grad_norm": 0.19540533423423767, |
| "learning_rate": 2.927209030751265e-05, |
| "loss": 0.9563, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.8549499659963081, |
| "grad_norm": 0.21219204366207123, |
| "learning_rate": 2.919423900350331e-05, |
| "loss": 1.0914, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8553385796172156, |
| "grad_norm": 0.1871120035648346, |
| "learning_rate": 2.911638769949397e-05, |
| "loss": 0.9683, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.855727193238123, |
| "grad_norm": 0.2022469937801361, |
| "learning_rate": 2.9038536395484628e-05, |
| "loss": 1.0552, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.8561158068590304, |
| "grad_norm": 0.21184539794921875, |
| "learning_rate": 2.896068509147528e-05, |
| "loss": 1.0544, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.8565044204799378, |
| "grad_norm": 0.21650457382202148, |
| "learning_rate": 2.888283378746594e-05, |
| "loss": 1.0683, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.8568930341008453, |
| "grad_norm": 0.19166558980941772, |
| "learning_rate": 2.88049824834566e-05, |
| "loss": 0.9317, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8572816477217526, |
| "grad_norm": 0.21191413700580597, |
| "learning_rate": 2.872713117944726e-05, |
| "loss": 0.9775, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.8576702613426601, |
| "grad_norm": 0.1949252486228943, |
| "learning_rate": 2.864927987543792e-05, |
| "loss": 0.9771, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.8580588749635675, |
| "grad_norm": 0.18980230391025543, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 0.9816, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.8584474885844748, |
| "grad_norm": 0.20371113717556, |
| "learning_rate": 2.849357726741923e-05, |
| "loss": 1.0269, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.8588361022053823, |
| "grad_norm": 0.2025761753320694, |
| "learning_rate": 2.841572596340989e-05, |
| "loss": 0.9169, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8592247158262897, |
| "grad_norm": 0.20668815076351166, |
| "learning_rate": 2.8337874659400547e-05, |
| "loss": 1.0409, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.8596133294471971, |
| "grad_norm": 0.19602157175540924, |
| "learning_rate": 2.8260023355391203e-05, |
| "loss": 0.9752, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.8600019430681045, |
| "grad_norm": 0.19047275185585022, |
| "learning_rate": 2.818217205138186e-05, |
| "loss": 0.9862, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.860390556689012, |
| "grad_norm": 0.20148906111717224, |
| "learning_rate": 2.810432074737252e-05, |
| "loss": 1.0339, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.8607791703099194, |
| "grad_norm": 0.19507504999637604, |
| "learning_rate": 2.802646944336318e-05, |
| "loss": 1.0452, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8611677839308268, |
| "grad_norm": 0.22428153455257416, |
| "learning_rate": 2.7948618139353838e-05, |
| "loss": 1.0652, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.8615563975517342, |
| "grad_norm": 0.19588248431682587, |
| "learning_rate": 2.787076683534449e-05, |
| "loss": 0.9816, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.8619450111726416, |
| "grad_norm": 0.20823241770267487, |
| "learning_rate": 2.7792915531335152e-05, |
| "loss": 1.0239, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.862333624793549, |
| "grad_norm": 0.20268678665161133, |
| "learning_rate": 2.771506422732581e-05, |
| "loss": 1.0057, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.8627222384144564, |
| "grad_norm": 0.22147025167942047, |
| "learning_rate": 2.763721292331647e-05, |
| "loss": 1.0296, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8631108520353639, |
| "grad_norm": 0.2015751451253891, |
| "learning_rate": 2.7559361619307122e-05, |
| "loss": 0.9884, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.8634994656562712, |
| "grad_norm": 0.20846128463745117, |
| "learning_rate": 2.748151031529778e-05, |
| "loss": 1.032, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.8638880792771787, |
| "grad_norm": 0.212540403008461, |
| "learning_rate": 2.740365901128844e-05, |
| "loss": 1.0432, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.8642766928980861, |
| "grad_norm": 0.19588392972946167, |
| "learning_rate": 2.73258077072791e-05, |
| "loss": 1.0203, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.8646653065189935, |
| "grad_norm": 0.2195088416337967, |
| "learning_rate": 2.7247956403269757e-05, |
| "loss": 1.0415, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8650539201399009, |
| "grad_norm": 0.20950359106063843, |
| "learning_rate": 2.7170105099260413e-05, |
| "loss": 1.0114, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.8654425337608083, |
| "grad_norm": 0.23009665310382843, |
| "learning_rate": 2.709225379525107e-05, |
| "loss": 1.0018, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.8658311473817157, |
| "grad_norm": 0.19696195423603058, |
| "learning_rate": 2.701440249124173e-05, |
| "loss": 1.0135, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.8662197610026231, |
| "grad_norm": 0.2212006151676178, |
| "learning_rate": 2.693655118723239e-05, |
| "loss": 1.0557, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.8666083746235306, |
| "grad_norm": 0.21312370896339417, |
| "learning_rate": 2.6858699883223045e-05, |
| "loss": 1.0758, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8669969882444379, |
| "grad_norm": 0.21425843238830566, |
| "learning_rate": 2.6780848579213703e-05, |
| "loss": 1.0234, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.8673856018653454, |
| "grad_norm": 0.2145942598581314, |
| "learning_rate": 2.6702997275204362e-05, |
| "loss": 0.9831, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.8677742154862528, |
| "grad_norm": 0.20881056785583496, |
| "learning_rate": 2.662514597119502e-05, |
| "loss": 1.0255, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.8681628291071602, |
| "grad_norm": 0.19835254549980164, |
| "learning_rate": 2.654729466718568e-05, |
| "loss": 0.9868, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.8685514427280676, |
| "grad_norm": 0.21160255372524261, |
| "learning_rate": 2.6469443363176332e-05, |
| "loss": 1.0024, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.868940056348975, |
| "grad_norm": 0.2119852900505066, |
| "learning_rate": 2.639159205916699e-05, |
| "loss": 0.9886, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.8693286699698825, |
| "grad_norm": 0.2107681930065155, |
| "learning_rate": 2.631374075515765e-05, |
| "loss": 1.0311, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.8697172835907898, |
| "grad_norm": 0.2076905369758606, |
| "learning_rate": 2.623588945114831e-05, |
| "loss": 1.0217, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.8701058972116973, |
| "grad_norm": 0.20869198441505432, |
| "learning_rate": 2.6158038147138964e-05, |
| "loss": 0.9488, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.8704945108326047, |
| "grad_norm": 0.1986512839794159, |
| "learning_rate": 2.6080186843129623e-05, |
| "loss": 1.0216, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8708831244535121, |
| "grad_norm": 0.19954320788383484, |
| "learning_rate": 2.600233553912028e-05, |
| "loss": 0.988, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.8712717380744195, |
| "grad_norm": 0.22843138873577118, |
| "learning_rate": 2.592448423511094e-05, |
| "loss": 1.0979, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.871660351695327, |
| "grad_norm": 0.21942777931690216, |
| "learning_rate": 2.58466329311016e-05, |
| "loss": 1.0378, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.8720489653162343, |
| "grad_norm": 0.21504725515842438, |
| "learning_rate": 2.5768781627092255e-05, |
| "loss": 1.0628, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.8724375789371418, |
| "grad_norm": 0.21556456387043, |
| "learning_rate": 2.5690930323082913e-05, |
| "loss": 0.9943, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8728261925580492, |
| "grad_norm": 0.2099362164735794, |
| "learning_rate": 2.5613079019073572e-05, |
| "loss": 1.0603, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.8732148061789565, |
| "grad_norm": 0.2027025669813156, |
| "learning_rate": 2.553522771506423e-05, |
| "loss": 1.0028, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.873603419799864, |
| "grad_norm": 0.2144668847322464, |
| "learning_rate": 2.5457376411054883e-05, |
| "loss": 1.0462, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.8739920334207714, |
| "grad_norm": 0.20712412893772125, |
| "learning_rate": 2.5379525107045542e-05, |
| "loss": 0.9842, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.8743806470416788, |
| "grad_norm": 0.19471199810504913, |
| "learning_rate": 2.53016738030362e-05, |
| "loss": 1.0171, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8747692606625862, |
| "grad_norm": 0.19841787219047546, |
| "learning_rate": 2.522382249902686e-05, |
| "loss": 0.9034, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.8751578742834937, |
| "grad_norm": 0.20370744168758392, |
| "learning_rate": 2.5145971195017522e-05, |
| "loss": 1.0249, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.875546487904401, |
| "grad_norm": 0.22168315947055817, |
| "learning_rate": 2.5068119891008174e-05, |
| "loss": 1.0624, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.8759351015253085, |
| "grad_norm": 0.200806125998497, |
| "learning_rate": 2.4990268586998833e-05, |
| "loss": 1.0452, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.8763237151462159, |
| "grad_norm": 0.19972844421863556, |
| "learning_rate": 2.491241728298949e-05, |
| "loss": 1.0563, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8767123287671232, |
| "grad_norm": 0.19919687509536743, |
| "learning_rate": 2.4834565978980147e-05, |
| "loss": 1.0249, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.8771009423880307, |
| "grad_norm": 0.19924059510231018, |
| "learning_rate": 2.4756714674970806e-05, |
| "loss": 1.016, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.8774895560089381, |
| "grad_norm": 0.2038920521736145, |
| "learning_rate": 2.4678863370961465e-05, |
| "loss": 1.0116, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.8778781696298456, |
| "grad_norm": 0.20609620213508606, |
| "learning_rate": 2.4601012066952123e-05, |
| "loss": 1.0153, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.8782667832507529, |
| "grad_norm": 0.20705272257328033, |
| "learning_rate": 2.4523160762942782e-05, |
| "loss": 1.013, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8786553968716604, |
| "grad_norm": 0.19973833858966827, |
| "learning_rate": 2.4445309458933438e-05, |
| "loss": 0.9932, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.8790440104925678, |
| "grad_norm": 0.20942817628383636, |
| "learning_rate": 2.4367458154924097e-05, |
| "loss": 1.0091, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.8794326241134752, |
| "grad_norm": 0.3686840236186981, |
| "learning_rate": 2.4289606850914752e-05, |
| "loss": 1.0157, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.8798212377343826, |
| "grad_norm": 0.20390458405017853, |
| "learning_rate": 2.4211755546905414e-05, |
| "loss": 1.0431, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.88020985135529, |
| "grad_norm": 0.2211003601551056, |
| "learning_rate": 2.413390424289607e-05, |
| "loss": 1.089, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.8805984649761974, |
| "grad_norm": 0.20558148622512817, |
| "learning_rate": 2.405605293888673e-05, |
| "loss": 0.9798, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.8809870785971048, |
| "grad_norm": 0.19347704946994781, |
| "learning_rate": 2.3978201634877384e-05, |
| "loss": 0.97, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.8813756922180123, |
| "grad_norm": 0.19454139471054077, |
| "learning_rate": 2.3900350330868043e-05, |
| "loss": 1.0265, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.8817643058389196, |
| "grad_norm": 0.19511118531227112, |
| "learning_rate": 2.38224990268587e-05, |
| "loss": 0.994, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.8821529194598271, |
| "grad_norm": 0.19948701560497284, |
| "learning_rate": 2.3744647722849357e-05, |
| "loss": 0.9911, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8825415330807345, |
| "grad_norm": 0.21110126376152039, |
| "learning_rate": 2.366679641884002e-05, |
| "loss": 1.0484, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.8829301467016419, |
| "grad_norm": 0.20160740613937378, |
| "learning_rate": 2.3588945114830675e-05, |
| "loss": 0.9934, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.8833187603225493, |
| "grad_norm": 0.20967216789722443, |
| "learning_rate": 2.3511093810821333e-05, |
| "loss": 1.0081, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.8837073739434567, |
| "grad_norm": 0.1981070339679718, |
| "learning_rate": 2.343324250681199e-05, |
| "loss": 1.0093, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.8840959875643641, |
| "grad_norm": 0.21609579026699066, |
| "learning_rate": 2.3355391202802648e-05, |
| "loss": 1.0954, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.8844846011852715, |
| "grad_norm": 0.18667754530906677, |
| "learning_rate": 2.3277539898793303e-05, |
| "loss": 0.9833, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.884873214806179, |
| "grad_norm": 0.2127734273672104, |
| "learning_rate": 2.3199688594783965e-05, |
| "loss": 1.0508, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.8852618284270863, |
| "grad_norm": 0.2117089331150055, |
| "learning_rate": 2.3121837290774624e-05, |
| "loss": 1.0557, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.8856504420479938, |
| "grad_norm": 0.21022644639015198, |
| "learning_rate": 2.304398598676528e-05, |
| "loss": 1.0297, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.8860390556689012, |
| "grad_norm": 0.19904713332653046, |
| "learning_rate": 2.296613468275594e-05, |
| "loss": 0.9693, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8864276692898087, |
| "grad_norm": 0.23006491363048553, |
| "learning_rate": 2.2888283378746594e-05, |
| "loss": 1.0409, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.886816282910716, |
| "grad_norm": 0.2179296761751175, |
| "learning_rate": 2.2810432074737253e-05, |
| "loss": 1.0433, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.8872048965316235, |
| "grad_norm": 0.19764657318592072, |
| "learning_rate": 2.273258077072791e-05, |
| "loss": 0.9807, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.8875935101525309, |
| "grad_norm": 0.23379875719547272, |
| "learning_rate": 2.265472946671857e-05, |
| "loss": 1.1025, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.8879821237734382, |
| "grad_norm": 0.2069517821073532, |
| "learning_rate": 2.2576878162709226e-05, |
| "loss": 1.0466, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.8883707373943457, |
| "grad_norm": 0.22321875393390656, |
| "learning_rate": 2.2499026858699885e-05, |
| "loss": 1.0548, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.888759351015253, |
| "grad_norm": 0.2070666253566742, |
| "learning_rate": 2.2421175554690543e-05, |
| "loss": 1.0168, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.8891479646361605, |
| "grad_norm": 0.1939924657344818, |
| "learning_rate": 2.23433242506812e-05, |
| "loss": 1.0008, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.8895365782570679, |
| "grad_norm": 0.22350658476352692, |
| "learning_rate": 2.2265472946671858e-05, |
| "loss": 1.0469, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.8899251918779754, |
| "grad_norm": 0.19934551417827606, |
| "learning_rate": 2.2187621642662516e-05, |
| "loss": 0.977, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8903138054988827, |
| "grad_norm": 0.22848142683506012, |
| "learning_rate": 2.2109770338653175e-05, |
| "loss": 1.0642, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.8907024191197902, |
| "grad_norm": 0.20296107232570648, |
| "learning_rate": 2.203191903464383e-05, |
| "loss": 1.0332, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.8910910327406976, |
| "grad_norm": 0.19952169060707092, |
| "learning_rate": 2.195406773063449e-05, |
| "loss": 1.0249, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.8914796463616049, |
| "grad_norm": 0.22449292242527008, |
| "learning_rate": 2.1876216426625145e-05, |
| "loss": 1.0572, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.8918682599825124, |
| "grad_norm": 0.20287659764289856, |
| "learning_rate": 2.1798365122615804e-05, |
| "loss": 1.0331, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.8922568736034198, |
| "grad_norm": 0.2029801905155182, |
| "learning_rate": 2.1720513818606463e-05, |
| "loss": 1.0326, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.8926454872243272, |
| "grad_norm": 0.21909672021865845, |
| "learning_rate": 2.164266251459712e-05, |
| "loss": 1.0903, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.8930341008452346, |
| "grad_norm": 0.21067824959754944, |
| "learning_rate": 2.156481121058778e-05, |
| "loss": 1.0425, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.8934227144661421, |
| "grad_norm": 0.20612956583499908, |
| "learning_rate": 2.1486959906578436e-05, |
| "loss": 1.0269, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.8938113280870494, |
| "grad_norm": 0.22750885784626007, |
| "learning_rate": 2.1409108602569095e-05, |
| "loss": 1.081, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8941999417079569, |
| "grad_norm": 0.2192569077014923, |
| "learning_rate": 2.133125729855975e-05, |
| "loss": 1.0305, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.8945885553288643, |
| "grad_norm": 0.2150728702545166, |
| "learning_rate": 2.125340599455041e-05, |
| "loss": 1.0369, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.8949771689497716, |
| "grad_norm": 0.2095833718776703, |
| "learning_rate": 2.1175554690541068e-05, |
| "loss": 1.0392, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.8953657825706791, |
| "grad_norm": 0.2074289619922638, |
| "learning_rate": 2.1097703386531726e-05, |
| "loss": 0.9893, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.8957543961915865, |
| "grad_norm": 0.20826508104801178, |
| "learning_rate": 2.1019852082522385e-05, |
| "loss": 1.0737, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.896143009812494, |
| "grad_norm": 0.20254862308502197, |
| "learning_rate": 2.094200077851304e-05, |
| "loss": 1.0251, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.8965316234334013, |
| "grad_norm": 0.20950356125831604, |
| "learning_rate": 2.08641494745037e-05, |
| "loss": 1.026, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.8969202370543088, |
| "grad_norm": 0.20761284232139587, |
| "learning_rate": 2.0786298170494355e-05, |
| "loss": 1.0556, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.8973088506752162, |
| "grad_norm": 0.1943255513906479, |
| "learning_rate": 2.0708446866485014e-05, |
| "loss": 0.9745, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.8976974642961236, |
| "grad_norm": 0.19723530113697052, |
| "learning_rate": 2.0630595562475673e-05, |
| "loss": 0.9764, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.898086077917031, |
| "grad_norm": 0.21135687828063965, |
| "learning_rate": 2.055274425846633e-05, |
| "loss": 1.0289, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.8984746915379384, |
| "grad_norm": 0.20867012441158295, |
| "learning_rate": 2.047489295445699e-05, |
| "loss": 1.0659, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.8988633051588458, |
| "grad_norm": 0.1999632567167282, |
| "learning_rate": 2.0397041650447646e-05, |
| "loss": 0.9699, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.8992519187797532, |
| "grad_norm": 0.2080952674150467, |
| "learning_rate": 2.0319190346438305e-05, |
| "loss": 1.0097, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.8996405324006607, |
| "grad_norm": 0.20419847965240479, |
| "learning_rate": 2.024133904242896e-05, |
| "loss": 1.0272, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.900029146021568, |
| "grad_norm": 0.19433575868606567, |
| "learning_rate": 2.016348773841962e-05, |
| "loss": 0.9892, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.9004177596424755, |
| "grad_norm": 0.20644325017929077, |
| "learning_rate": 2.0085636434410278e-05, |
| "loss": 0.9978, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.9008063732633829, |
| "grad_norm": 0.2145605981349945, |
| "learning_rate": 2.0007785130400936e-05, |
| "loss": 1.0569, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.9011949868842903, |
| "grad_norm": 0.2073410153388977, |
| "learning_rate": 1.9929933826391592e-05, |
| "loss": 1.0937, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.9015836005051977, |
| "grad_norm": 0.2169773280620575, |
| "learning_rate": 1.985208252238225e-05, |
| "loss": 1.0559, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.9019722141261051, |
| "grad_norm": 0.2153279334306717, |
| "learning_rate": 1.977423121837291e-05, |
| "loss": 1.074, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.9023608277470125, |
| "grad_norm": 0.2089853584766388, |
| "learning_rate": 1.9696379914363565e-05, |
| "loss": 0.9971, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.9027494413679199, |
| "grad_norm": 0.21813471615314484, |
| "learning_rate": 1.9618528610354224e-05, |
| "loss": 1.0408, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.9031380549888274, |
| "grad_norm": 0.19753578305244446, |
| "learning_rate": 1.9540677306344883e-05, |
| "loss": 0.9429, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.9035266686097347, |
| "grad_norm": 0.19760333001613617, |
| "learning_rate": 1.946282600233554e-05, |
| "loss": 1.0127, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.9039152822306422, |
| "grad_norm": 0.21375150978565216, |
| "learning_rate": 1.9384974698326197e-05, |
| "loss": 1.0166, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.9043038958515496, |
| "grad_norm": 0.21019572019577026, |
| "learning_rate": 1.9307123394316856e-05, |
| "loss": 0.9897, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.904692509472457, |
| "grad_norm": 0.20336006581783295, |
| "learning_rate": 1.922927209030751e-05, |
| "loss": 0.9788, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.9050811230933644, |
| "grad_norm": 0.20877422392368317, |
| "learning_rate": 1.915142078629817e-05, |
| "loss": 1.0257, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.9054697367142719, |
| "grad_norm": 0.21499283611774445, |
| "learning_rate": 1.9073569482288832e-05, |
| "loss": 1.0628, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.9058583503351793, |
| "grad_norm": 0.2943152189254761, |
| "learning_rate": 1.8995718178279488e-05, |
| "loss": 1.0859, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.9062469639560866, |
| "grad_norm": 0.20630142092704773, |
| "learning_rate": 1.8917866874270146e-05, |
| "loss": 1.0625, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.9066355775769941, |
| "grad_norm": 0.19609740376472473, |
| "learning_rate": 1.8840015570260802e-05, |
| "loss": 1.0043, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.9070241911979015, |
| "grad_norm": 0.21231451630592346, |
| "learning_rate": 1.876216426625146e-05, |
| "loss": 1.0534, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.9074128048188089, |
| "grad_norm": 0.2212425172328949, |
| "learning_rate": 1.8684312962242116e-05, |
| "loss": 0.99, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.9078014184397163, |
| "grad_norm": 0.21141575276851654, |
| "learning_rate": 1.860646165823278e-05, |
| "loss": 1.0442, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.9081900320606238, |
| "grad_norm": 0.20657780766487122, |
| "learning_rate": 1.8528610354223434e-05, |
| "loss": 1.0363, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.9085786456815311, |
| "grad_norm": 0.1973218023777008, |
| "learning_rate": 1.8450759050214093e-05, |
| "loss": 0.9868, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.9089672593024386, |
| "grad_norm": 0.19639235734939575, |
| "learning_rate": 1.837290774620475e-05, |
| "loss": 0.9865, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.909355872923346, |
| "grad_norm": 0.194901704788208, |
| "learning_rate": 1.8295056442195407e-05, |
| "loss": 0.9776, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.9097444865442533, |
| "grad_norm": 0.1907500922679901, |
| "learning_rate": 1.8217205138186066e-05, |
| "loss": 1.0048, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.9101331001651608, |
| "grad_norm": 0.20842313766479492, |
| "learning_rate": 1.813935383417672e-05, |
| "loss": 0.9773, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.9105217137860682, |
| "grad_norm": 0.2537369132041931, |
| "learning_rate": 1.8061502530167383e-05, |
| "loss": 0.9932, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.9109103274069756, |
| "grad_norm": 0.22774042189121246, |
| "learning_rate": 1.798365122615804e-05, |
| "loss": 1.1521, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.911298941027883, |
| "grad_norm": 0.192257359623909, |
| "learning_rate": 1.7905799922148698e-05, |
| "loss": 0.9707, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.9116875546487905, |
| "grad_norm": 0.21573100984096527, |
| "learning_rate": 1.7827948618139353e-05, |
| "loss": 1.0355, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.9120761682696978, |
| "grad_norm": 0.215474933385849, |
| "learning_rate": 1.7750097314130012e-05, |
| "loss": 1.0408, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.9124647818906053, |
| "grad_norm": 0.2031407654285431, |
| "learning_rate": 1.767224601012067e-05, |
| "loss": 1.0429, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.9128533955115127, |
| "grad_norm": 0.20461305975914001, |
| "learning_rate": 1.759439470611133e-05, |
| "loss": 1.0033, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.91324200913242, |
| "grad_norm": 0.20995965600013733, |
| "learning_rate": 1.7516543402101988e-05, |
| "loss": 1.089, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9136306227533275, |
| "grad_norm": 0.20464631915092468, |
| "learning_rate": 1.7438692098092644e-05, |
| "loss": 1.0438, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.9140192363742349, |
| "grad_norm": 0.20657162368297577, |
| "learning_rate": 1.7360840794083303e-05, |
| "loss": 1.0687, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.9144078499951424, |
| "grad_norm": 0.20419646799564362, |
| "learning_rate": 1.7282989490073958e-05, |
| "loss": 1.0412, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.9147964636160497, |
| "grad_norm": 0.20655421912670135, |
| "learning_rate": 1.7205138186064617e-05, |
| "loss": 1.0343, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.9151850772369572, |
| "grad_norm": 0.20393185317516327, |
| "learning_rate": 1.7127286882055276e-05, |
| "loss": 1.0379, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.9155736908578646, |
| "grad_norm": 0.20768289268016815, |
| "learning_rate": 1.7049435578045934e-05, |
| "loss": 1.022, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.915962304478772, |
| "grad_norm": 0.2257547676563263, |
| "learning_rate": 1.6971584274036593e-05, |
| "loss": 1.1081, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.9163509180996794, |
| "grad_norm": 0.1980145126581192, |
| "learning_rate": 1.689373297002725e-05, |
| "loss": 1.0439, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.9167395317205868, |
| "grad_norm": 0.20351259410381317, |
| "learning_rate": 1.6815881666017908e-05, |
| "loss": 1.0363, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.9171281453414942, |
| "grad_norm": 0.20830631256103516, |
| "learning_rate": 1.6738030362008563e-05, |
| "loss": 1.0467, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9175167589624016, |
| "grad_norm": 0.21225905418395996, |
| "learning_rate": 1.6660179057999222e-05, |
| "loss": 1.0611, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.9179053725833091, |
| "grad_norm": 0.20069880783557892, |
| "learning_rate": 1.658232775398988e-05, |
| "loss": 0.9989, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.9182939862042164, |
| "grad_norm": 0.21674825251102448, |
| "learning_rate": 1.650447644998054e-05, |
| "loss": 1.0578, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.9186825998251239, |
| "grad_norm": 0.20438091456890106, |
| "learning_rate": 1.6426625145971195e-05, |
| "loss": 1.0593, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.9190712134460313, |
| "grad_norm": 0.2195381075143814, |
| "learning_rate": 1.6348773841961854e-05, |
| "loss": 1.0354, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.9194598270669387, |
| "grad_norm": 0.21371111273765564, |
| "learning_rate": 1.6270922537952513e-05, |
| "loss": 0.9911, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.9198484406878461, |
| "grad_norm": 0.22097980976104736, |
| "learning_rate": 1.6193071233943168e-05, |
| "loss": 1.0064, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.9202370543087536, |
| "grad_norm": 0.20589159429073334, |
| "learning_rate": 1.6115219929933827e-05, |
| "loss": 1.0173, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.9206256679296609, |
| "grad_norm": 0.19218075275421143, |
| "learning_rate": 1.6037368625924486e-05, |
| "loss": 1.0215, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.9210142815505683, |
| "grad_norm": 0.2132728099822998, |
| "learning_rate": 1.5959517321915144e-05, |
| "loss": 1.0493, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9214028951714758, |
| "grad_norm": 0.20006981492042542, |
| "learning_rate": 1.58816660179058e-05, |
| "loss": 0.9814, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.9217915087923831, |
| "grad_norm": 0.21600167453289032, |
| "learning_rate": 1.580381471389646e-05, |
| "loss": 1.0759, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.9221801224132906, |
| "grad_norm": 0.21474605798721313, |
| "learning_rate": 1.5725963409887114e-05, |
| "loss": 1.0411, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.922568736034198, |
| "grad_norm": 0.2044600546360016, |
| "learning_rate": 1.5648112105877773e-05, |
| "loss": 1.0236, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.9229573496551055, |
| "grad_norm": 0.20302869379520416, |
| "learning_rate": 1.5570260801868432e-05, |
| "loss": 0.9982, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.9233459632760128, |
| "grad_norm": 0.21155263483524323, |
| "learning_rate": 1.549240949785909e-05, |
| "loss": 1.0249, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.9237345768969203, |
| "grad_norm": 0.20336754620075226, |
| "learning_rate": 1.541455819384975e-05, |
| "loss": 1.0223, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.9241231905178277, |
| "grad_norm": 0.20189301669597626, |
| "learning_rate": 1.5336706889840405e-05, |
| "loss": 1.0228, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.924511804138735, |
| "grad_norm": 0.1962178647518158, |
| "learning_rate": 1.5258855585831064e-05, |
| "loss": 1.0137, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.9249004177596425, |
| "grad_norm": 0.21523639559745789, |
| "learning_rate": 1.518100428182172e-05, |
| "loss": 1.0498, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9252890313805499, |
| "grad_norm": 0.20537924766540527, |
| "learning_rate": 1.510315297781238e-05, |
| "loss": 0.9995, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.9256776450014573, |
| "grad_norm": 0.21170039474964142, |
| "learning_rate": 1.5025301673803035e-05, |
| "loss": 1.0953, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.9260662586223647, |
| "grad_norm": 0.20737627148628235, |
| "learning_rate": 1.4947450369793694e-05, |
| "loss": 0.9892, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.9264548722432722, |
| "grad_norm": 0.20684003829956055, |
| "learning_rate": 1.4869599065784354e-05, |
| "loss": 1.0468, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.9268434858641795, |
| "grad_norm": 0.20738738775253296, |
| "learning_rate": 1.479174776177501e-05, |
| "loss": 1.0436, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.927232099485087, |
| "grad_norm": 0.19740383327007294, |
| "learning_rate": 1.4713896457765669e-05, |
| "loss": 0.9528, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.9276207131059944, |
| "grad_norm": 0.20328152179718018, |
| "learning_rate": 1.4636045153756326e-05, |
| "loss": 1.0272, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.9280093267269017, |
| "grad_norm": 0.2008744776248932, |
| "learning_rate": 1.4558193849746985e-05, |
| "loss": 1.0441, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.9283979403478092, |
| "grad_norm": 0.19907627999782562, |
| "learning_rate": 1.448034254573764e-05, |
| "loss": 0.9929, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.9287865539687166, |
| "grad_norm": 0.20299683511257172, |
| "learning_rate": 1.44024912417283e-05, |
| "loss": 0.9749, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.929175167589624, |
| "grad_norm": 0.21035155653953552, |
| "learning_rate": 1.432463993771896e-05, |
| "loss": 1.0356, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.9295637812105314, |
| "grad_norm": 0.20862546563148499, |
| "learning_rate": 1.4246788633709615e-05, |
| "loss": 1.0594, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.9299523948314389, |
| "grad_norm": 0.20775675773620605, |
| "learning_rate": 1.4168937329700274e-05, |
| "loss": 0.9959, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.9303410084523462, |
| "grad_norm": 0.1970052868127823, |
| "learning_rate": 1.409108602569093e-05, |
| "loss": 0.9956, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.9307296220732537, |
| "grad_norm": 0.2167968600988388, |
| "learning_rate": 1.401323472168159e-05, |
| "loss": 1.0202, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.9311182356941611, |
| "grad_norm": 0.20822198688983917, |
| "learning_rate": 1.3935383417672245e-05, |
| "loss": 1.0067, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.9315068493150684, |
| "grad_norm": 0.2004898339509964, |
| "learning_rate": 1.3857532113662906e-05, |
| "loss": 1.0069, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.9318954629359759, |
| "grad_norm": 0.22808429598808289, |
| "learning_rate": 1.3779680809653561e-05, |
| "loss": 1.1032, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.9322840765568833, |
| "grad_norm": 0.19940750300884247, |
| "learning_rate": 1.370182950564422e-05, |
| "loss": 0.9965, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.9326726901777908, |
| "grad_norm": 0.21138110756874084, |
| "learning_rate": 1.3623978201634879e-05, |
| "loss": 0.986, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9330613037986981, |
| "grad_norm": 0.2118709534406662, |
| "learning_rate": 1.3546126897625536e-05, |
| "loss": 1.0672, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.9334499174196056, |
| "grad_norm": 0.22121763229370117, |
| "learning_rate": 1.3468275593616195e-05, |
| "loss": 1.0204, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.933838531040513, |
| "grad_norm": 0.20541204512119293, |
| "learning_rate": 1.3390424289606852e-05, |
| "loss": 1.0749, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.9342271446614204, |
| "grad_norm": 0.19598713517189026, |
| "learning_rate": 1.331257298559751e-05, |
| "loss": 0.9638, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.9346157582823278, |
| "grad_norm": 0.2157907783985138, |
| "learning_rate": 1.3234721681588166e-05, |
| "loss": 1.0312, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.9350043719032352, |
| "grad_norm": 0.19694723188877106, |
| "learning_rate": 1.3156870377578825e-05, |
| "loss": 1.0001, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.9353929855241426, |
| "grad_norm": 0.209597647190094, |
| "learning_rate": 1.3079019073569482e-05, |
| "loss": 0.9808, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.93578159914505, |
| "grad_norm": 0.2026679664850235, |
| "learning_rate": 1.300116776956014e-05, |
| "loss": 0.9938, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.20847374200820923, |
| "learning_rate": 1.29233164655508e-05, |
| "loss": 0.9948, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.9365588263868648, |
| "grad_norm": 0.23478667438030243, |
| "learning_rate": 1.2845465161541457e-05, |
| "loss": 1.0549, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9369474400077723, |
| "grad_norm": 0.20954233407974243, |
| "learning_rate": 1.2767613857532116e-05, |
| "loss": 1.0336, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.9373360536286797, |
| "grad_norm": 0.2130623608827591, |
| "learning_rate": 1.2689762553522771e-05, |
| "loss": 1.0398, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.9377246672495871, |
| "grad_norm": 0.20076791942119598, |
| "learning_rate": 1.261191124951343e-05, |
| "loss": 0.9945, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.9381132808704945, |
| "grad_norm": 0.21280889213085175, |
| "learning_rate": 1.2534059945504087e-05, |
| "loss": 1.0538, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.938501894491402, |
| "grad_norm": 0.19909800589084625, |
| "learning_rate": 1.2456208641494746e-05, |
| "loss": 0.9783, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.9388905081123093, |
| "grad_norm": 0.21449251472949982, |
| "learning_rate": 1.2378357337485403e-05, |
| "loss": 1.0547, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.9392791217332167, |
| "grad_norm": 0.20742881298065186, |
| "learning_rate": 1.2300506033476062e-05, |
| "loss": 1.0471, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.9396677353541242, |
| "grad_norm": 0.21160250902175903, |
| "learning_rate": 1.2222654729466719e-05, |
| "loss": 1.0089, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.9400563489750315, |
| "grad_norm": 0.22055311501026154, |
| "learning_rate": 1.2144803425457376e-05, |
| "loss": 1.0201, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.940444962595939, |
| "grad_norm": 0.21073050796985626, |
| "learning_rate": 1.2066952121448035e-05, |
| "loss": 1.0025, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9408335762168464, |
| "grad_norm": 0.19758272171020508, |
| "learning_rate": 1.1989100817438692e-05, |
| "loss": 0.9643, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.9412221898377539, |
| "grad_norm": 0.20312103629112244, |
| "learning_rate": 1.191124951342935e-05, |
| "loss": 1.023, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.9416108034586612, |
| "grad_norm": 0.19969260692596436, |
| "learning_rate": 1.183339820942001e-05, |
| "loss": 0.9623, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.9419994170795687, |
| "grad_norm": 0.21867750585079193, |
| "learning_rate": 1.1755546905410667e-05, |
| "loss": 1.0895, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.942388030700476, |
| "grad_norm": 0.19672009348869324, |
| "learning_rate": 1.1677695601401324e-05, |
| "loss": 1.0253, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9427766443213834, |
| "grad_norm": 0.20442704856395721, |
| "learning_rate": 1.1599844297391983e-05, |
| "loss": 1.0515, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.9431652579422909, |
| "grad_norm": 0.2008974254131317, |
| "learning_rate": 1.152199299338264e-05, |
| "loss": 0.9934, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.9435538715631983, |
| "grad_norm": 0.20074884593486786, |
| "learning_rate": 1.1444141689373297e-05, |
| "loss": 0.9792, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.9439424851841057, |
| "grad_norm": 0.1945987194776535, |
| "learning_rate": 1.1366290385363956e-05, |
| "loss": 0.991, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.9443310988050131, |
| "grad_norm": 0.2123355269432068, |
| "learning_rate": 1.1288439081354613e-05, |
| "loss": 0.9768, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9447197124259206, |
| "grad_norm": 0.19462116062641144, |
| "learning_rate": 1.1210587777345272e-05, |
| "loss": 1.0221, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.9451083260468279, |
| "grad_norm": 0.21487726271152496, |
| "learning_rate": 1.1132736473335929e-05, |
| "loss": 1.0273, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.9454969396677354, |
| "grad_norm": 0.2011580765247345, |
| "learning_rate": 1.1054885169326588e-05, |
| "loss": 1.0065, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.9458855532886428, |
| "grad_norm": 0.2009819597005844, |
| "learning_rate": 1.0977033865317245e-05, |
| "loss": 1.04, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.9462741669095501, |
| "grad_norm": 0.20142634212970734, |
| "learning_rate": 1.0899182561307902e-05, |
| "loss": 1.0101, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.9466627805304576, |
| "grad_norm": 0.20323152840137482, |
| "learning_rate": 1.082133125729856e-05, |
| "loss": 1.0039, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.947051394151365, |
| "grad_norm": 0.18746018409729004, |
| "learning_rate": 1.0743479953289218e-05, |
| "loss": 0.9876, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.9474400077722724, |
| "grad_norm": 0.20016197860240936, |
| "learning_rate": 1.0665628649279875e-05, |
| "loss": 1.0067, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.9478286213931798, |
| "grad_norm": 0.19872961938381195, |
| "learning_rate": 1.0587777345270534e-05, |
| "loss": 0.9884, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.9482172350140873, |
| "grad_norm": 0.20647788047790527, |
| "learning_rate": 1.0509926041261193e-05, |
| "loss": 1.0088, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9486058486349946, |
| "grad_norm": 0.20790119469165802, |
| "learning_rate": 1.043207473725185e-05, |
| "loss": 1.0201, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.9489944622559021, |
| "grad_norm": 0.20318609476089478, |
| "learning_rate": 1.0354223433242507e-05, |
| "loss": 1.0199, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.9493830758768095, |
| "grad_norm": 0.21426942944526672, |
| "learning_rate": 1.0276372129233166e-05, |
| "loss": 1.0047, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.9497716894977168, |
| "grad_norm": 0.3223714828491211, |
| "learning_rate": 1.0198520825223823e-05, |
| "loss": 1.0532, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.9501603031186243, |
| "grad_norm": 0.2070651799440384, |
| "learning_rate": 1.012066952121448e-05, |
| "loss": 1.0576, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9505489167395317, |
| "grad_norm": 0.20618025958538055, |
| "learning_rate": 1.0042818217205139e-05, |
| "loss": 1.061, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.9509375303604392, |
| "grad_norm": 0.20535731315612793, |
| "learning_rate": 9.964966913195796e-06, |
| "loss": 0.9923, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.9513261439813465, |
| "grad_norm": 0.21038392186164856, |
| "learning_rate": 9.887115609186455e-06, |
| "loss": 1.0257, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.951714757602254, |
| "grad_norm": 0.20872676372528076, |
| "learning_rate": 9.809264305177112e-06, |
| "loss": 1.0147, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.9521033712231614, |
| "grad_norm": 0.40158966183662415, |
| "learning_rate": 9.73141300116777e-06, |
| "loss": 1.0071, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9524919848440688, |
| "grad_norm": 0.1991165280342102, |
| "learning_rate": 9.653561697158428e-06, |
| "loss": 0.9829, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.9528805984649762, |
| "grad_norm": 0.1965460628271103, |
| "learning_rate": 9.575710393149085e-06, |
| "loss": 1.0286, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.9532692120858836, |
| "grad_norm": 0.20879510045051575, |
| "learning_rate": 9.497859089139744e-06, |
| "loss": 1.0707, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.953657825706791, |
| "grad_norm": 0.19594980776309967, |
| "learning_rate": 9.420007785130401e-06, |
| "loss": 0.9946, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.9540464393276984, |
| "grad_norm": 0.19754594564437866, |
| "learning_rate": 9.342156481121058e-06, |
| "loss": 0.9737, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9544350529486059, |
| "grad_norm": 0.21339558064937592, |
| "learning_rate": 9.264305177111717e-06, |
| "loss": 1.0505, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.9548236665695132, |
| "grad_norm": 0.20371811091899872, |
| "learning_rate": 9.186453873102376e-06, |
| "loss": 1.0594, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.9552122801904207, |
| "grad_norm": 0.20965653657913208, |
| "learning_rate": 9.108602569093033e-06, |
| "loss": 1.0639, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.9556008938113281, |
| "grad_norm": 0.20316167175769806, |
| "learning_rate": 9.030751265083692e-06, |
| "loss": 1.0219, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.9559895074322355, |
| "grad_norm": 0.19921238720417023, |
| "learning_rate": 8.952899961074349e-06, |
| "loss": 1.0399, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9563781210531429, |
| "grad_norm": 0.196847602725029, |
| "learning_rate": 8.875048657065006e-06, |
| "loss": 0.9678, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.9567667346740504, |
| "grad_norm": 0.20746973156929016, |
| "learning_rate": 8.797197353055665e-06, |
| "loss": 1.0365, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.9571553482949577, |
| "grad_norm": 0.3297490179538727, |
| "learning_rate": 8.719346049046322e-06, |
| "loss": 1.0028, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.9575439619158651, |
| "grad_norm": 0.2101137936115265, |
| "learning_rate": 8.641494745036979e-06, |
| "loss": 1.0627, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.9579325755367726, |
| "grad_norm": 0.2444445937871933, |
| "learning_rate": 8.563643441027638e-06, |
| "loss": 0.9866, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9583211891576799, |
| "grad_norm": 0.20323987305164337, |
| "learning_rate": 8.485792137018297e-06, |
| "loss": 1.0123, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.9587098027785874, |
| "grad_norm": 0.21334567666053772, |
| "learning_rate": 8.407940833008954e-06, |
| "loss": 1.0492, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.9590984163994948, |
| "grad_norm": 0.19852736592292786, |
| "learning_rate": 8.330089528999611e-06, |
| "loss": 1.0303, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.9594870300204023, |
| "grad_norm": 0.1995389610528946, |
| "learning_rate": 8.25223822499027e-06, |
| "loss": 0.9758, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.9598756436413096, |
| "grad_norm": 0.19799165427684784, |
| "learning_rate": 8.174386920980927e-06, |
| "loss": 0.9541, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9602642572622171, |
| "grad_norm": 0.21066170930862427, |
| "learning_rate": 8.096535616971584e-06, |
| "loss": 1.0389, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.9606528708831245, |
| "grad_norm": 0.19671034812927246, |
| "learning_rate": 8.018684312962243e-06, |
| "loss": 0.9791, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.9610414845040318, |
| "grad_norm": 0.2106933444738388, |
| "learning_rate": 7.9408330089529e-06, |
| "loss": 0.9479, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.9614300981249393, |
| "grad_norm": 0.20396657288074493, |
| "learning_rate": 7.862981704943557e-06, |
| "loss": 1.0068, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.9618187117458467, |
| "grad_norm": 0.19684381783008575, |
| "learning_rate": 7.785130400934216e-06, |
| "loss": 1.0347, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9622073253667541, |
| "grad_norm": 0.19494709372520447, |
| "learning_rate": 7.707279096924875e-06, |
| "loss": 0.9997, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.9625959389876615, |
| "grad_norm": 0.21996809542179108, |
| "learning_rate": 7.629427792915532e-06, |
| "loss": 1.0517, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.962984552608569, |
| "grad_norm": 0.2083420753479004, |
| "learning_rate": 7.55157648890619e-06, |
| "loss": 1.0483, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.9633731662294763, |
| "grad_norm": 0.2018081396818161, |
| "learning_rate": 7.473725184896847e-06, |
| "loss": 1.0167, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.9637617798503838, |
| "grad_norm": 0.22427868843078613, |
| "learning_rate": 7.395873880887505e-06, |
| "loss": 0.9759, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9641503934712912, |
| "grad_norm": 0.2190699577331543, |
| "learning_rate": 7.318022576878163e-06, |
| "loss": 1.049, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.9645390070921985, |
| "grad_norm": 0.2035333812236786, |
| "learning_rate": 7.24017127286882e-06, |
| "loss": 1.0556, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.964927620713106, |
| "grad_norm": 0.20165729522705078, |
| "learning_rate": 7.16231996885948e-06, |
| "loss": 0.9958, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.9653162343340134, |
| "grad_norm": 0.20284077525138855, |
| "learning_rate": 7.084468664850137e-06, |
| "loss": 1.0146, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.9657048479549208, |
| "grad_norm": 0.1984403133392334, |
| "learning_rate": 7.006617360840795e-06, |
| "loss": 0.9797, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.9660934615758282, |
| "grad_norm": 0.22276800870895386, |
| "learning_rate": 6.928766056831453e-06, |
| "loss": 1.042, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.9664820751967357, |
| "grad_norm": 0.18282116949558258, |
| "learning_rate": 6.85091475282211e-06, |
| "loss": 0.9681, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.966870688817643, |
| "grad_norm": 0.19382023811340332, |
| "learning_rate": 6.773063448812768e-06, |
| "loss": 0.9991, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.9672593024385505, |
| "grad_norm": 0.2009381204843521, |
| "learning_rate": 6.695212144803426e-06, |
| "loss": 1.0061, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.9676479160594579, |
| "grad_norm": 0.2232959270477295, |
| "learning_rate": 6.617360840794083e-06, |
| "loss": 1.0776, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9680365296803652, |
| "grad_norm": 0.2164563238620758, |
| "learning_rate": 6.539509536784741e-06, |
| "loss": 1.0834, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.9684251433012727, |
| "grad_norm": 0.2053539901971817, |
| "learning_rate": 6.4616582327754e-06, |
| "loss": 1.0449, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.9688137569221801, |
| "grad_norm": 0.23249384760856628, |
| "learning_rate": 6.383806928766058e-06, |
| "loss": 1.0418, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.9692023705430876, |
| "grad_norm": 0.18624578416347504, |
| "learning_rate": 6.305955624756715e-06, |
| "loss": 0.9152, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.9695909841639949, |
| "grad_norm": 0.2001798450946808, |
| "learning_rate": 6.228104320747373e-06, |
| "loss": 1.0084, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.9699795977849024, |
| "grad_norm": 0.2341216653585434, |
| "learning_rate": 6.150253016738031e-06, |
| "loss": 0.9935, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.9703682114058098, |
| "grad_norm": 0.21359120309352875, |
| "learning_rate": 6.072401712728688e-06, |
| "loss": 1.0498, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.9707568250267172, |
| "grad_norm": 0.21405139565467834, |
| "learning_rate": 5.994550408719346e-06, |
| "loss": 1.096, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.9711454386476246, |
| "grad_norm": 0.2035064846277237, |
| "learning_rate": 5.916699104710005e-06, |
| "loss": 1.0351, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.9715340522685321, |
| "grad_norm": 0.19452853500843048, |
| "learning_rate": 5.838847800700662e-06, |
| "loss": 0.9994, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2574, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1321276344029348e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|