| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.82857142857143, | |
| "eval_steps": 500, | |
| "global_step": 430, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 5.1473, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.0769230769230774e-05, | |
| "loss": 5.1654, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.615384615384616e-05, | |
| "loss": 4.4404, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 6.153846153846155e-05, | |
| "loss": 2.1218, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 7.692307692307693e-05, | |
| "loss": 0.7891, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.230769230769232e-05, | |
| "loss": 0.4084, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001076923076923077, | |
| "loss": 0.2285, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001230769230769231, | |
| "loss": 0.2082, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00013846153846153847, | |
| "loss": 0.1934, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00015384615384615385, | |
| "loss": 0.1951, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00016923076923076923, | |
| "loss": 0.1867, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018461538461538463, | |
| "loss": 0.1931, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002, | |
| "loss": 0.1895, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00019999716210981734, | |
| "loss": 0.16, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019998864860034169, | |
| "loss": 0.2126, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001999744599547812, | |
| "loss": 0.1772, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001999545969784522, | |
| "loss": 0.1949, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00019992906079873365, | |
| "loss": 0.1923, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00019989785286500295, | |
| "loss": 0.1709, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001998609749485539, | |
| "loss": 0.1823, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001998184291424961, | |
| "loss": 0.1656, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00019977021786163598, | |
| "loss": 0.194, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00019971634384234003, | |
| "loss": 0.174, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00019965681014237917, | |
| "loss": 0.1699, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00019959162014075553, | |
| "loss": 0.1771, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00019952077753751036, | |
| "loss": 0.1942, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00019944428635351426, | |
| "loss": 0.1818, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00019936215093023884, | |
| "loss": 0.1956, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001992743759295103, | |
| "loss": 0.1738, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00019918096633324492, | |
| "loss": 0.1897, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001990819274431662, | |
| "loss": 0.1558, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00019897726488050406, | |
| "loss": 0.2183, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00019886698458567562, | |
| "loss": 0.1844, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00019875109281794825, | |
| "loss": 0.1774, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00019862959615508417, | |
| "loss": 0.1709, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00019850250149296703, | |
| "loss": 0.2023, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00019836981604521076, | |
| "loss": 0.1717, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00019823154734274997, | |
| "loss": 0.1751, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0001980877032334125, | |
| "loss": 0.1903, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00019793829188147406, | |
| "loss": 0.2017, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00019778332176719483, | |
| "loss": 0.1651, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00019762280168633814, | |
| "loss": 0.1913, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0001974567407496712, | |
| "loss": 0.1749, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0001972851483824481, | |
| "loss": 0.1953, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00019710803432387465, | |
| "loss": 0.2126, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00019692540862655585, | |
| "loss": 0.181, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001967372816559252, | |
| "loss": 0.176, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00019654366408965635, | |
| "loss": 0.1655, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00019634456691705702, | |
| "loss": 0.1676, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00019614000143844558, | |
| "loss": 0.1438, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001959299792645092, | |
| "loss": 0.1915, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00019571451231564525, | |
| "loss": 0.2314, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00019549361282128445, | |
| "loss": 0.1736, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00019526729331919697, | |
| "loss": 0.1765, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00019503556665478067, | |
| "loss": 0.1969, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00019479844598033202, | |
| "loss": 0.1968, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0001945559447542998, | |
| "loss": 0.1724, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00019430807674052092, | |
| "loss": 0.2096, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00019405485600743942, | |
| "loss": 0.1861, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00019379629692730798, | |
| "loss": 0.1752, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00019353241417537214, | |
| "loss": 0.1746, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00019326322272903722, | |
| "loss": 0.175, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00019298873786701857, | |
| "loss": 0.1752, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00019270897516847403, | |
| "loss": 0.1719, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00019242395051212, | |
| "loss": 0.1814, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00019213368007532986, | |
| "loss": 0.1792, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00019183818033321614, | |
| "loss": 0.1683, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00019153746805769512, | |
| "loss": 0.175, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00019123156031653515, | |
| "loss": 0.1558, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00019092047447238773, | |
| "loss": 0.1907, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00019060422818180207, | |
| "loss": 0.2572, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00019028283939422308, | |
| "loss": 0.1781, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0001899563263509725, | |
| "loss": 0.1659, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00018962470758421342, | |
| "loss": 0.174, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.0001892880019158988, | |
| "loss": 0.1795, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00018894622845670283, | |
| "loss": 0.1712, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00018859940660493634, | |
| "loss": 0.1628, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00018824755604544594, | |
| "loss": 0.1901, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0001878906967484966, | |
| "loss": 0.1549, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0001875288489686382, | |
| "loss": 0.1972, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00018716203324355607, | |
| "loss": 0.1758, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00018679027039290497, | |
| "loss": 0.1772, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0001864135815171279, | |
| "loss": 0.1839, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00018603198799625807, | |
| "loss": 0.192, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00018564551148870563, | |
| "loss": 0.1795, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00018525417393002824, | |
| "loss": 0.1731, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00018485799753168634, | |
| "loss": 0.1965, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 0.00018445700477978205, | |
| "loss": 0.214, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0001840512184337833, | |
| "loss": 0.2024, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.00018364066152523183, | |
| "loss": 0.1786, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00018322535735643605, | |
| "loss": 0.1938, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.00018280532949914842, | |
| "loss": 0.1634, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.0001823806017932276, | |
| "loss": 0.1655, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.00018195119834528534, | |
| "loss": 0.1635, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 0.00018151714352731822, | |
| "loss": 0.1938, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00018107846197532433, | |
| "loss": 0.1696, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00018063517858790516, | |
| "loss": 0.1806, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00018018731852485206, | |
| "loss": 0.1826, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00017973490720571864, | |
| "loss": 0.1976, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.00017927797030837768, | |
| "loss": 0.1815, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00017881653376756394, | |
| "loss": 0.1818, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.0001783506237734019, | |
| "loss": 0.1649, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00017788026676991963, | |
| "loss": 0.1684, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00017740548945354752, | |
| "loss": 0.1517, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00017692631877160326, | |
| "loss": 0.1871, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.0001764427819207624, | |
| "loss": 0.1921, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.0001759549063455145, | |
| "loss": 0.1659, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.00017546271973660574, | |
| "loss": 0.1807, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.000174966250029467, | |
| "loss": 0.1808, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.00017446552540262844, | |
| "loss": 0.1844, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.0001739605742761201, | |
| "loss": 0.174, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.00017345142530985887, | |
| "loss": 0.1752, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00017293810740202182, | |
| "loss": 0.1788, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.00017242064968740598, | |
| "loss": 0.1748, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.00017189908153577473, | |
| "loss": 0.1711, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.0001713734325501908, | |
| "loss": 0.1741, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00017084373256533603, | |
| "loss": 0.1779, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00017031001164581828, | |
| "loss": 0.1761, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00016977230008446466, | |
| "loss": 0.1771, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00016923062840060234, | |
| "loss": 0.1682, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00016868502733832644, | |
| "loss": 0.175, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.00016813552786475495, | |
| "loss": 0.1804, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.00016758216116827105, | |
| "loss": 0.1724, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.0001670249586567531, | |
| "loss": 0.2074, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00016646395195579178, | |
| "loss": 0.1881, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00016589917290689532, | |
| "loss": 0.1791, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00016533065356568206, | |
| "loss": 0.1841, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.00016475842620006118, | |
| "loss": 0.1779, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0001641825232884011, | |
| "loss": 0.1812, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.0001636029775176862, | |
| "loss": 0.1699, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.0001630198217816616, | |
| "loss": 0.1741, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.000162433089178966, | |
| "loss": 0.1683, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.0001618428130112533, | |
| "loss": 0.1808, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.0001612490267813023, | |
| "loss": 0.1663, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.0001606517641911153, | |
| "loss": 0.1684, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 0.00016005105914000507, | |
| "loss": 0.1675, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 0.00015944694572267096, | |
| "loss": 0.1706, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.00015883945822726372, | |
| "loss": 0.1773, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.00015822863113343935, | |
| "loss": 0.1763, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.00015761449911040208, | |
| "loss": 0.1799, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.00015699709701493667, | |
| "loss": 0.1684, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.0001563764598894301, | |
| "loss": 0.1742, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.0001557526229598824, | |
| "loss": 0.1751, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 0.0001551256216339076, | |
| "loss": 0.1754, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 0.00015449549149872376, | |
| "loss": 0.1764, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 0.00015386226831913348, | |
| "loss": 0.1703, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.00015322598803549356, | |
| "loss": 0.1731, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.00015258668676167546, | |
| "loss": 0.1741, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 0.00015194440078301536, | |
| "loss": 0.1703, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 0.00015129916655425468, | |
| "loss": 0.167, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 0.00015065102069747118, | |
| "loss": 0.1876, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.1761, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 0.00014934614141234618, | |
| "loss": 0.1592, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.000148689482046087, | |
| "loss": 0.1581, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 0.00014803005917176585, | |
| "loss": 0.1804, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.00014736791021677676, | |
| "loss": 0.1699, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 0.0001467030727632401, | |
| "loss": 0.2209, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 0.0001460355845458695, | |
| "loss": 0.177, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 0.00014536548344983016, | |
| "loss": 0.1828, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.00014469280750858854, | |
| "loss": 0.1725, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 0.00014401759490175362, | |
| "loss": 0.1645, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 0.00014333988395290992, | |
| "loss": 0.1754, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 0.00014265971312744252, | |
| "loss": 0.1867, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.00014197712103035346, | |
| "loss": 0.1735, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 0.00014129214640407102, | |
| "loss": 0.1767, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 0.00014060482812625055, | |
| "loss": 0.1657, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 0.0001399152052075679, | |
| "loss": 0.1734, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.00013922331678950525, | |
| "loss": 0.1821, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 0.00013852920214212964, | |
| "loss": 0.1839, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 0.00013783290066186391, | |
| "loss": 0.1958, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 0.00013713445186925075, | |
| "loss": 0.1815, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 0.00013643389540670962, | |
| "loss": 0.1716, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 0.00013573127103628667, | |
| "loss": 0.1688, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.00013502661863739793, | |
| "loss": 0.1664, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00013431997820456592, | |
| "loss": 0.1638, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 0.0001336113898451496, | |
| "loss": 0.2074, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 0.0001329008937770679, | |
| "loss": 0.1675, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 0.0001321885303265172, | |
| "loss": 0.1556, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 0.00013147433992568227, | |
| "loss": 0.1653, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 0.00013075836311044175, | |
| "loss": 0.1603, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 0.0001300406405180671, | |
| "loss": 0.1758, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.0001293212128849163, | |
| "loss": 0.1949, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.00012860012104412165, | |
| "loss": 0.17, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 0.0001278774059232723, | |
| "loss": 0.1662, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 0.00012715310854209124, | |
| "loss": 0.1571, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 0.00012642727001010694, | |
| "loss": 0.1979, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 0.00012569993152432028, | |
| "loss": 0.1666, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.00012497113436686627, | |
| "loss": 0.1065, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.00012424091990267087, | |
| "loss": 0.1146, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.0001235093295771032, | |
| "loss": 0.1749, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 0.00012277640491362341, | |
| "loss": 0.1256, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 0.0001220421875114256, | |
| "loss": 0.1835, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 0.0001213067190430769, | |
| "loss": 0.1628, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 0.00012057004125215223, | |
| "loss": 0.256, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 0.00011983219595086506, | |
| "loss": 0.146, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 0.00011909322501769406, | |
| "loss": 0.1682, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.0001183531703950064, | |
| "loss": 0.1794, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 0.00011761207408667703, | |
| "loss": 0.1905, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 0.00011686997815570473, | |
| "loss": 0.1749, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 0.00011612692472182463, | |
| "loss": 0.1775, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 0.00011538295595911764, | |
| "loss": 0.1672, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 0.00011463811409361667, | |
| "loss": 0.2042, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 0.00011389244140091013, | |
| "loss": 0.1714, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 0.00011314598020374231, | |
| "loss": 0.1637, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 0.00011239877286961122, | |
| "loss": 0.1786, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 0.00011165086180836406, | |
| "loss": 0.175, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 0.00011090228946979, | |
| "loss": 0.1763, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 0.00011015309834121081, | |
| "loss": 0.1941, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 0.00010940333094506952, | |
| "loss": 0.1452, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.00010865302983651673, | |
| "loss": 0.1719, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.00010790223760099549, | |
| "loss": 0.1697, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 0.00010715099685182408, | |
| "loss": 0.1644, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 0.00010639935022777741, | |
| "loss": 0.1683, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 0.00010564734039066699, | |
| "loss": 0.1746, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 0.00010489501002291952, | |
| "loss": 0.1606, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 0.00010414240182515429, | |
| "loss": 0.1841, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00010338955851375962, | |
| "loss": 0.1833, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 0.00010263652281846837, | |
| "loss": 0.1802, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 0.00010188333747993264, | |
| "loss": 0.1675, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 0.00010113004524729799, | |
| "loss": 0.1598, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 0.00010037668887577709, | |
| "loss": 0.1612, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 9.962331112422293e-05, | |
| "loss": 0.1812, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 9.886995475270205e-05, | |
| "loss": 0.1853, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 9.811666252006742e-05, | |
| "loss": 0.1369, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 9.73634771815317e-05, | |
| "loss": 0.1563, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 9.661044148624037e-05, | |
| "loss": 0.1466, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 9.58575981748457e-05, | |
| "loss": 0.1343, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 9.510498997708049e-05, | |
| "loss": 0.1231, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 9.435265960933302e-05, | |
| "loss": 0.1472, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 9.360064977222262e-05, | |
| "loss": 0.1681, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 9.284900314817597e-05, | |
| "loss": 0.2364, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 9.209776239900453e-05, | |
| "loss": 0.1228, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 9.134697016348327e-05, | |
| "loss": 0.1417, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 9.05966690549305e-05, | |
| "loss": 0.1512, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 8.984690165878921e-05, | |
| "loss": 0.1248, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 8.909771053021002e-05, | |
| "loss": 0.1252, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 8.834913819163595e-05, | |
| "loss": 0.1341, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 8.760122713038881e-05, | |
| "loss": 0.1644, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 8.685401979625774e-05, | |
| "loss": 0.0977, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 8.610755859908991e-05, | |
| "loss": 0.1699, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 8.536188590638334e-05, | |
| "loss": 0.1196, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 8.46170440408824e-05, | |
| "loss": 0.0777, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 8.387307527817539e-05, | |
| "loss": 0.1266, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 8.313002184429529e-05, | |
| "loss": 0.1463, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 8.238792591332299e-05, | |
| "loss": 0.1037, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 8.164682960499361e-05, | |
| "loss": 0.1385, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 8.090677498230596e-05, | |
| "loss": 0.0932, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 8.016780404913496e-05, | |
| "loss": 0.1294, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 7.942995874784776e-05, | |
| "loss": 0.191, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 7.869328095692312e-05, | |
| "loss": 0.1488, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 7.795781248857443e-05, | |
| "loss": 0.1259, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 7.72235950863766e-05, | |
| "loss": 0.1266, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 7.64906704228968e-05, | |
| "loss": 0.1172, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 7.575908009732918e-05, | |
| "loss": 0.1032, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 7.502886563313376e-05, | |
| "loss": 0.0891, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 7.430006847567972e-05, | |
| "loss": 0.0909, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 7.357272998989308e-05, | |
| "loss": 0.1367, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 7.284689145790878e-05, | |
| "loss": 0.0965, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 7.21225940767277e-05, | |
| "loss": 0.1868, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 7.139987895587836e-05, | |
| "loss": 0.3087, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 7.067878711508375e-05, | |
| "loss": 0.1388, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 6.995935948193294e-05, | |
| "loss": 0.142, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 6.924163688955825e-05, | |
| "loss": 0.1212, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 6.852566007431773e-05, | |
| "loss": 0.1369, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 6.781146967348284e-05, | |
| "loss": 0.0927, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 6.709910622293212e-05, | |
| "loss": 0.1146, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 6.638861015485043e-05, | |
| "loss": 0.1059, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 6.568002179543409e-05, | |
| "loss": 0.1108, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 6.497338136260209e-05, | |
| "loss": 0.1333, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 6.426872896371331e-05, | |
| "loss": 0.115, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 6.356610459329038e-05, | |
| "loss": 0.0776, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 6.286554813074925e-05, | |
| "loss": 0.1038, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 6.21670993381361e-05, | |
| "loss": 0.0796, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 6.147079785787038e-05, | |
| "loss": 0.0982, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 6.0776683210494766e-05, | |
| "loss": 0.114, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 6.0084794792432155e-05, | |
| "loss": 0.0922, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 5.93951718737495e-05, | |
| "loss": 0.0725, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 5.8707853595928985e-05, | |
| "loss": 0.0855, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 5.802287896964658e-05, | |
| "loss": 0.1254, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 5.734028687255751e-05, | |
| "loss": 0.1193, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 5.666011604709005e-05, | |
| "loss": 0.1212, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 5.598240509824642e-05, | |
| "loss": 0.1744, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 5.530719249141147e-05, | |
| "loss": 0.062, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 5.463451655016988e-05, | |
| "loss": 0.1408, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 5.39644154541305e-05, | |
| "loss": 0.0819, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 5.329692723675994e-05, | |
| "loss": 0.118, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5.263208978322326e-05, | |
| "loss": 0.0602, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 5.1969940828234184e-05, | |
| "loss": 0.0708, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 5.131051795391302e-05, | |
| "loss": 0.107, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 5.065385858765383e-05, | |
| "loss": 0.0621, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 5.000000000000002e-05, | |
| "loss": 0.0428, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 4.934897930252886e-05, | |
| "loss": 0.111, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 4.870083344574531e-05, | |
| "loss": 0.1184, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 4.805559921698464e-05, | |
| "loss": 0.0919, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 4.7413313238324556e-05, | |
| "loss": 0.0477, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 4.6774011964506435e-05, | |
| "loss": 0.0738, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 4.613773168086657e-05, | |
| "loss": 0.101, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 4.550450850127625e-05, | |
| "loss": 0.0585, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 4.4874378366092476e-05, | |
| "loss": 0.0443, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 4.42473770401176e-05, | |
| "loss": 0.1272, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 4.3623540110569935e-05, | |
| "loss": 0.1569, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 4.300290298506333e-05, | |
| "loss": 0.0314, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 4.238550088959796e-05, | |
| "loss": 0.1179, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 4.1771368866560665e-05, | |
| "loss": 0.1037, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 4.116054177273627e-05, | |
| "loss": 0.0898, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 4.0553054277329074e-05, | |
| "loss": 0.1015, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 3.9948940859994966e-05, | |
| "loss": 0.0652, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 3.9348235808884724e-05, | |
| "loss": 0.0403, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 3.875097321869768e-05, | |
| "loss": 0.0501, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 3.815718698874672e-05, | |
| "loss": 0.0874, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 3.7566910821034005e-05, | |
| "loss": 0.0336, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 3.698017821833844e-05, | |
| "loss": 0.0606, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 3.6397022482313805e-05, | |
| "loss": 0.0154, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 3.5817476711598906e-05, | |
| "loss": 0.0232, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 3.524157379993882e-05, | |
| "loss": 0.0202, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 3.466934643431795e-05, | |
| "loss": 0.0991, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 3.4100827093104694e-05, | |
| "loss": 0.1159, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 3.353604804420821e-05, | |
| "loss": 0.012, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 3.2975041343246936e-05, | |
| "loss": 0.0735, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 3.241783883172895e-05, | |
| "loss": 0.0097, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 3.186447213524508e-05, | |
| "loss": 0.03, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 3.131497266167357e-05, | |
| "loss": 0.0764, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 3.076937159939768e-05, | |
| "loss": 0.0166, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 3.0227699915535367e-05, | |
| "loss": 0.1195, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 2.968998835418174e-05, | |
| "loss": 0.117, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 2.9156267434663963e-05, | |
| "loss": 0.0241, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 2.862656744980926e-05, | |
| "loss": 0.0874, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 2.81009184642253e-05, | |
| "loss": 0.062, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 2.757935031259402e-05, | |
| "loss": 0.0262, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 2.7061892597978177e-05, | |
| "loss": 0.1282, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 2.6548574690141125e-05, | |
| "loss": 0.0045, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 2.603942572387993e-05, | |
| "loss": 0.0423, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.553447459737157e-05, | |
| "loss": 0.0448, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 2.5033749970533015e-05, | |
| "loss": 0.0534, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 2.4537280263394258e-05, | |
| "loss": 0.04, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 2.4045093654485518e-05, | |
| "loss": 0.0356, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 2.355721807923761e-05, | |
| "loss": 0.0786, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 2.307368122839675e-05, | |
| "loss": 0.0441, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 2.2594510546452507e-05, | |
| "loss": 0.0155, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 2.2119733230080408e-05, | |
| "loss": 0.0217, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 2.1649376226598106e-05, | |
| "loss": 0.0472, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 2.1183466232436088e-05, | |
| "loss": 0.0354, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 2.0722029691622336e-05, | |
| "loss": 0.0702, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 2.026509279428137e-05, | |
| "loss": 0.0759, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 1.9812681475147942e-05, | |
| "loss": 0.1333, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 1.9364821412094857e-05, | |
| "loss": 0.0323, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.8921538024675678e-05, | |
| "loss": 0.0105, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 1.848285647268181e-05, | |
| "loss": 0.0554, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.8048801654714688e-05, | |
| "loss": 0.045, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.761939820677241e-05, | |
| "loss": 0.0068, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 1.7194670500851616e-05, | |
| "loss": 0.024, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 1.6774642643563953e-05, | |
| "loss": 0.0245, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 1.6359338474768193e-05, | |
| "loss": 0.0177, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 1.594878156621672e-05, | |
| "loss": 0.0234, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 1.554299522021796e-05, | |
| "loss": 0.0174, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 1.5142002468313699e-05, | |
| "loss": 0.0074, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 1.4745826069971758e-05, | |
| "loss": 0.0468, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 1.4354488511294417e-05, | |
| "loss": 0.0051, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 1.3968012003741948e-05, | |
| "loss": 0.0042, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 1.35864184828721e-05, | |
| "loss": 0.0071, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 1.3209729607095023e-05, | |
| "loss": 0.0074, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 1.2837966756443975e-05, | |
| "loss": 0.0087, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 1.2471151031361794e-05, | |
| "loss": 0.0081, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 1.2109303251503434e-05, | |
| "loss": 0.0068, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 1.1752443954554082e-05, | |
| "loss": 0.0068, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 1.1400593395063686e-05, | |
| "loss": 0.01, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.1053771543297198e-05, | |
| "loss": 0.0078, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 1.0711998084101205e-05, | |
| "loss": 0.0106, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 1.0375292415786575e-05, | |
| "loss": 0.0035, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 1.0043673649027518e-05, | |
| "loss": 0.0715, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 9.717160605776932e-06, | |
| "loss": 0.0093, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 9.39577181819794e-06, | |
| "loss": 0.0815, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 9.07952552761232e-06, | |
| "loss": 0.0086, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 8.768439683464868e-06, | |
| "loss": 0.0138, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 8.462531942304896e-06, | |
| "loss": 0.0132, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 8.161819666783888e-06, | |
| "loss": 0.0141, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 7.866319924670163e-06, | |
| "loss": 0.0477, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 7.576049487880033e-06, | |
| "loss": 0.0103, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 7.291024831525961e-06, | |
| "loss": 0.0044, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 7.011262132981456e-06, | |
| "loss": 0.051, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 6.7367772709627905e-06, | |
| "loss": 0.0031, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.467585824627887e-06, | |
| "loss": 0.016, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 6.203703072692013e-06, | |
| "loss": 0.0054, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 5.945143992560587e-06, | |
| "loss": 0.0042, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 5.691923259479093e-06, | |
| "loss": 0.0133, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 5.444055245700208e-06, | |
| "loss": 0.026, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 5.201554019667965e-06, | |
| "loss": 0.0347, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 4.964433345219355e-06, | |
| "loss": 0.0498, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 4.732706680803045e-06, | |
| "loss": 0.0218, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 4.506387178715565e-06, | |
| "loss": 0.0094, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 4.285487684354772e-06, | |
| "loss": 0.0798, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 4.070020735490809e-06, | |
| "loss": 0.0036, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 3.859998561554434e-06, | |
| "loss": 0.0086, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 3.655433082942972e-06, | |
| "loss": 0.0673, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 3.4563359103436886e-06, | |
| "loss": 0.0103, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 3.262718344074811e-06, | |
| "loss": 0.0038, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 3.0745913734441355e-06, | |
| "loss": 0.0127, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 2.891965676125352e-06, | |
| "loss": 0.0093, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 2.7148516175519277e-06, | |
| "loss": 0.0137, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 2.5432592503288e-06, | |
| "loss": 0.0027, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 2.377198313661877e-06, | |
| "loss": 0.0102, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 2.2166782328051803e-06, | |
| "loss": 0.0051, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 2.0617081185259512e-06, | |
| "loss": 0.0027, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 1.912296766587507e-06, | |
| "loss": 0.0074, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 1.7684526572500416e-06, | |
| "loss": 0.0029, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 1.6301839547892328e-06, | |
| "loss": 0.0106, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 1.4974985070329683e-06, | |
| "loss": 0.0243, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 1.3704038449158573e-06, | |
| "loss": 0.0031, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 1.2489071820517396e-06, | |
| "loss": 0.0041, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 1.1330154143243787e-06, | |
| "loss": 0.0096, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 1.0227351194959545e-06, | |
| "loss": 0.0103, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 9.180725568338044e-07, | |
| "loss": 0.0069, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 8.190336667550868e-07, | |
| "loss": 0.0048, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 7.256240704897166e-07, | |
| "loss": 0.0057, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 6.378490697611761e-07, | |
| "loss": 0.003, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 5.55713646485756e-07, | |
| "loss": 0.0067, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 4.79222462489648e-07, | |
| "loss": 0.0036, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 4.0837985924448984e-07, | |
| "loss": 0.0055, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 3.431898576208292e-07, | |
| "loss": 0.0027, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 2.836561576599839e-07, | |
| "loss": 0.0069, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 2.2978213836400975e-07, | |
| "loss": 0.0185, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "learning_rate": 1.815708575038988e-07, | |
| "loss": 0.0238, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 1.3902505144608446e-07, | |
| "loss": 0.012, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 1.0214713499706597e-07, | |
| "loss": 0.003, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 7.093920126638454e-08, | |
| "loss": 0.0022, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 4.54030215478074e-08, | |
| "loss": 0.0037, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 2.5540045218819253e-08, | |
| "loss": 0.0024, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 1.1351399658321438e-08, | |
| "loss": 0.0027, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 2.8378901826831005e-09, | |
| "loss": 0.0033, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 0.0, | |
| "loss": 0.0051, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "step": 430, | |
| "total_flos": 2.686707530150707e+16, | |
| "train_loss": 0.1627144819580365, | |
| "train_runtime": 335.6804, | |
| "train_samples_per_second": 20.794, | |
| "train_steps_per_second": 1.281 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 430, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 50000, | |
| "total_flos": 2.686707530150707e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |