diff --git "a/checkpoint-1342/trainer_state.json" "b/checkpoint-1342/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1342/trainer_state.json" @@ -0,0 +1,8137 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9806259314456036, + "eval_steps": 168, + "global_step": 1342, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.3745, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 1.6296857595443726, + "eval_runtime": 2.6662, + "eval_samples_per_second": 409.572, + "eval_steps_per_second": 25.88, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.42, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 1.3057, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 1.2307, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.289, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.4111, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 7e-05, + "loss": 1.3089, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.3204, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 9e-05, + "loss": 1.3575, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.3279, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011000000000000002, + "loss": 1.3149, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012, + "loss": 1.2578, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013000000000000002, + "loss": 1.2849, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.2971, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015000000000000001, + "loss": 1.1473, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.1943, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017, + "loss": 1.1877, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018, + "loss": 1.1984, + "step": 18 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019, + "loss": 1.2647, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 1.217, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999993046535236, + "loss": 1.0274, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999972186150606, + "loss": 1.2122, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999937418875124, + "loss": 1.1868, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999888744757143, + "loss": 1.2345, + "step": 24 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999826163864348, + "loss": 1.2127, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999749676283775, + "loss": 1.2114, + "step": 26 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999659282121792, + "loss": 1.2224, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955498150411, + "loss": 1.1517, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999943677457578, + "loss": 1.1631, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999930466150119, + "loss": 1.0465, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999915864246407, + "loss": 1.1847, + "step": 31 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999899871766749, + "loss": 1.1238, + "step": 32 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999882488733385, + "loss": 1.1491, + "step": 33 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199986371517049, + "loss": 1.276, + "step": 34 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999843551104172, + "loss": 1.0911, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998219965624734, + "loss": 1.1276, + "step": 36 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997990515753693, + "loss": 1.0981, + "step": 37 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997747161747695, + "loss": 1.0901, + "step": 38 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999748990394517, + "loss": 1.096, + "step": 39 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997218742703887, + "loss": 1.122, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996933678400946, + "loss": 1.1132, + "step": 41 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996634711432786, + "loss": 1.1498, + "step": 42 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996321842215173, + "loss": 1.0708, + "step": 43 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999599507118322, + "loss": 1.1154, + "step": 44 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995654398791355, + "loss": 1.2118, + "step": 45 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995299825513357, + "loss": 1.0919, + "step": 46 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994931351842327, + "loss": 1.1364, + "step": 47 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994548978290695, + "loss": 1.1442, + "step": 48 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999415270539023, + "loss": 1.1248, + "step": 49 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019993742533692022, + "loss": 1.1366, + "step": 50 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019993318463766495, + "loss": 1.1437, + "step": 51 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199928804962034, + "loss": 1.1191, + "step": 52 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999242863161182, + "loss": 1.0786, + "step": 53 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991962870620153, + "loss": 1.1951, + "step": 54 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991483213876134, + "loss": 1.1321, + "step": 55 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019990989662046818, + "loss": 1.0876, + "step": 56 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999048221581858, + "loss": 1.1794, + "step": 57 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989960875897126, + "loss": 1.1796, + "step": 58 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989425643007476, + "loss": 1.1165, + "step": 59 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998887651789398, + "loss": 1.1978, + "step": 60 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019988313501320297, + "loss": 1.1693, + "step": 61 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019987736594069414, + "loss": 1.1553, + "step": 62 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998714579694363, + "loss": 1.1959, + "step": 63 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019986541110764565, + "loss": 1.1945, + "step": 64 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985922536373146, + "loss": 1.121, + "step": 65 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985290074629627, + "loss": 1.122, + "step": 66 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019984643726413565, + "loss": 1.1435, + "step": 67 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019983983492623833, + "loss": 1.0413, + "step": 68 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998330937417861, + "loss": 1.078, + "step": 69 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998262137201539, + "loss": 1.0811, + "step": 70 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981919487090972, + "loss": 1.1639, + "step": 71 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981203720381463, + "loss": 1.164, + "step": 72 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019980474072882277, + "loss": 1.1006, + "step": 73 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019979730545608126, + "loss": 1.1926, + "step": 74 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001997897313959303, + "loss": 1.1129, + "step": 75 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019978201855890308, + "loss": 1.1367, + "step": 76 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019977416695572578, + "loss": 1.1495, + "step": 77 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997661765973176, + "loss": 1.1567, + "step": 78 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019975804749479062, + "loss": 1.2102, + "step": 79 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974977965945, + "loss": 1.1175, + "step": 80 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997413731027937, + "loss": 1.1243, + "step": 81 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973282783651263, + "loss": 1.1406, + "step": 82 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972414387249072, + "loss": 1.09, + "step": 83 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019971532122280464, + "loss": 1.0115, + "step": 84 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019970635989972402, + "loss": 1.0328, + "step": 85 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019969725991571128, + "loss": 1.1226, + "step": 86 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019968802128342172, + "loss": 1.0747, + "step": 87 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019967864401570343, + "loss": 1.119, + "step": 88 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019966912812559732, + "loss": 1.1125, + "step": 89 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019965947362633708, + "loss": 1.0734, + "step": 90 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996496805313491, + "loss": 1.1798, + "step": 91 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019963974885425266, + "loss": 1.1461, + "step": 92 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996296786088596, + "loss": 1.0397, + "step": 93 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019961946980917456, + "loss": 1.17, + "step": 94 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019960912246939485, + "loss": 1.0679, + "step": 95 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019959863660391045, + "loss": 1.0839, + "step": 96 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019958801222730394, + "loss": 1.0937, + "step": 97 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019957724935435063, + "loss": 1.1668, + "step": 98 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019956634800001832, + "loss": 1.0858, + "step": 99 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019955530817946748, + "loss": 1.0935, + "step": 100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019954412990805107, + "loss": 1.1046, + "step": 101 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019953281320131468, + "loss": 1.1319, + "step": 102 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019952135807499633, + "loss": 1.1108, + "step": 103 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001995097645450266, + "loss": 1.0485, + "step": 104 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019949803262752855, + "loss": 1.0862, + "step": 105 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019948616233881768, + "loss": 1.268, + "step": 106 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019947415369540189, + "loss": 1.0926, + "step": 107 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001994620067139815, + "loss": 1.1427, + "step": 108 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019944972141144928, + "loss": 1.0754, + "step": 109 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019943729780489027, + "loss": 1.0044, + "step": 110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001994247359115819, + "loss": 1.1304, + "step": 111 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019941203574899393, + "loss": 1.1683, + "step": 112 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019939919733478838, + "loss": 1.1559, + "step": 113 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019938622068681953, + "loss": 1.1879, + "step": 114 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019937310582313392, + "loss": 1.0613, + "step": 115 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993598527619703, + "loss": 1.1196, + "step": 116 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993464615217596, + "loss": 1.0762, + "step": 117 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019933293212112495, + "loss": 1.1059, + "step": 118 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019931926457888156, + "loss": 1.0831, + "step": 119 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019930545891403678, + "loss": 1.0552, + "step": 120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019929151514579008, + "loss": 1.15, + "step": 121 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019927743329353295, + "loss": 1.1038, + "step": 122 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992632133768489, + "loss": 1.067, + "step": 123 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992488554155135, + "loss": 1.1311, + "step": 124 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019923435942949426, + "loss": 1.1402, + "step": 125 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019921972543895066, + "loss": 1.0453, + "step": 126 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019920495346423402, + "loss": 1.1567, + "step": 127 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019919004352588767, + "loss": 1.137, + "step": 128 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001991749956446468, + "loss": 0.9986, + "step": 129 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019915980984143832, + "loss": 1.083, + "step": 130 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019914448613738106, + "loss": 1.0619, + "step": 131 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019912902455378556, + "loss": 1.1294, + "step": 132 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019911342511215414, + "loss": 1.0965, + "step": 133 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019909768783418086, + "loss": 1.0216, + "step": 134 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019908181274175138, + "loss": 1.0081, + "step": 135 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990657998569432, + "loss": 1.0246, + "step": 136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990496492020252, + "loss": 1.1249, + "step": 137 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019903336079945804, + "loss": 1.0518, + "step": 138 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019901693467189386, + "loss": 1.189, + "step": 139 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019900037084217637, + "loss": 1.1475, + "step": 140 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989836693333408, + "loss": 1.2259, + "step": 141 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989668301686138, + "loss": 1.0399, + "step": 142 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989498533714135, + "loss": 1.128, + "step": 143 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019893273896534936, + "loss": 1.014, + "step": 144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001989154869742223, + "loss": 1.1552, + "step": 145 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019889809742202455, + "loss": 1.1159, + "step": 146 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988805703329396, + "loss": 1.0218, + "step": 147 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019886290573134228, + "loss": 1.1723, + "step": 148 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988451036417986, + "loss": 1.2132, + "step": 149 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019882716408906585, + "loss": 1.112, + "step": 150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001988090870980924, + "loss": 1.0856, + "step": 151 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001987908726940178, + "loss": 1.0951, + "step": 152 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019877252090217271, + "loss": 1.0218, + "step": 153 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019875403174807882, + "loss": 1.0552, + "step": 154 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019873540525744887, + "loss": 1.1481, + "step": 155 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019871664145618657, + "loss": 1.169, + "step": 156 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019869774037038665, + "loss": 1.0802, + "step": 157 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986787020263347, + "loss": 1.0871, + "step": 158 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986595264505072, + "loss": 1.1022, + "step": 159 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019864021366957147, + "loss": 1.0257, + "step": 160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986207637103857, + "loss": 1.0986, + "step": 161 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019860117659999878, + "loss": 1.0837, + "step": 162 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019858145236565037, + "loss": 1.1895, + "step": 163 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019856159103477086, + "loss": 1.052, + "step": 164 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019854159263498123, + "loss": 1.1184, + "step": 165 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001985214571940931, + "loss": 1.0895, + "step": 166 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019850118474010872, + "loss": 1.0764, + "step": 167 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019848077530122083, + "loss": 1.1387, + "step": 168 + }, + { + "epoch": 0.25, + "eval_loss": 1.084919810295105, + "eval_runtime": 2.6029, + "eval_samples_per_second": 419.538, + "eval_steps_per_second": 26.509, + "step": 168 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019846022890581267, + "loss": 1.0826, + "step": 169 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198439545582458, + "loss": 1.1366, + "step": 170 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198418725359921, + "loss": 1.1349, + "step": 171 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019839776826715614, + "loss": 1.0636, + "step": 172 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019837667433330838, + "loss": 1.1216, + "step": 173 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001983554435877128, + "loss": 1.1051, + "step": 174 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019833407605989494, + "loss": 1.1558, + "step": 175 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019831257177957044, + "loss": 1.0364, + "step": 176 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019829093077664513, + "loss": 1.0665, + "step": 177 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019826915308121504, + "loss": 1.1994, + "step": 178 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982472387235662, + "loss": 1.1434, + "step": 179 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982251877341748, + "loss": 1.081, + "step": 180 + }, + { + "epoch": 0.27, + "learning_rate": 0.000198203000143707, + "loss": 1.0653, + "step": 181 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981806759830189, + "loss": 1.0269, + "step": 182 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981582152831566, + "loss": 1.1167, + "step": 183 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019813561807535598, + "loss": 1.0608, + "step": 184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001981128843910428, + "loss": 1.0989, + "step": 185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980900142618327, + "loss": 1.1405, + "step": 186 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019806700771953097, + "loss": 1.0359, + "step": 187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980438647961327, + "loss": 1.1073, + "step": 188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980205855238225, + "loss": 1.0338, + "step": 189 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019799716993497475, + "loss": 1.1285, + "step": 190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019797361806215332, + "loss": 1.1277, + "step": 191 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019794992993811165, + "loss": 1.119, + "step": 192 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019792610559579265, + "loss": 1.1224, + "step": 193 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019790214506832868, + "loss": 1.1438, + "step": 194 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001978780483890414, + "loss": 1.1462, + "step": 195 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019785381559144196, + "loss": 1.042, + "step": 196 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019782944670923076, + "loss": 1.1022, + "step": 197 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019780494177629735, + "loss": 1.0564, + "step": 198 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019778030082672068, + "loss": 1.0471, + "step": 199 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019775552389476864, + "loss": 1.0636, + "step": 200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001977306110148984, + "loss": 1.0917, + "step": 201 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019770556222175608, + "loss": 1.1965, + "step": 202 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019768037755017685, + "loss": 1.073, + "step": 203 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019765505703518496, + "loss": 1.0636, + "step": 204 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019762960071199333, + "loss": 1.087, + "step": 205 + }, + { + "epoch": 0.31, + "learning_rate": 0.000197604008616004, + "loss": 1.0569, + "step": 206 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019757828078280766, + "loss": 1.08, + "step": 207 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019755241724818387, + "loss": 1.1536, + "step": 208 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019752641804810084, + "loss": 1.1514, + "step": 209 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019750028321871546, + "loss": 1.0691, + "step": 210 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019747401279637325, + "loss": 1.1289, + "step": 211 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019744760681760832, + "loss": 1.0834, + "step": 212 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019742106531914328, + "loss": 1.0762, + "step": 213 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001973943883378892, + "loss": 1.0913, + "step": 214 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019736757591094558, + "loss": 1.132, + "step": 215 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019734062807560027, + "loss": 1.0894, + "step": 216 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019731354486932944, + "loss": 1.0327, + "step": 217 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019728632632979746, + "loss": 1.112, + "step": 218 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019725897249485704, + "loss": 1.0718, + "step": 219 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019723148340254892, + "loss": 1.077, + "step": 220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019720385909110198, + "loss": 1.0335, + "step": 221 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019717609959893318, + "loss": 1.0483, + "step": 222 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019714820496464746, + "loss": 1.0901, + "step": 223 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019712017522703764, + "loss": 0.9921, + "step": 224 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019709201042508455, + "loss": 1.0829, + "step": 225 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970637105979567, + "loss": 1.0705, + "step": 226 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970352757850105, + "loss": 1.0481, + "step": 227 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019700670602579008, + "loss": 0.9846, + "step": 228 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001969780013600272, + "loss": 1.1492, + "step": 229 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019694916182764113, + "loss": 1.1745, + "step": 230 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019692018746873892, + "loss": 1.0451, + "step": 231 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019689107832361496, + "loss": 1.1217, + "step": 232 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019686183443275116, + "loss": 1.0788, + "step": 233 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019683245583681675, + "loss": 1.0703, + "step": 234 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019680294257666837, + "loss": 1.1521, + "step": 235 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967732946933499, + "loss": 1.0659, + "step": 236 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019674351222809242, + "loss": 1.0321, + "step": 237 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967135952223142, + "loss": 1.0555, + "step": 238 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019668354371762066, + "loss": 1.0648, + "step": 239 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019665335775580415, + "loss": 1.0723, + "step": 240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001966230373788441, + "loss": 1.0264, + "step": 241 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019659258262890683, + "loss": 1.0331, + "step": 242 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019656199354834558, + "loss": 1.1514, + "step": 243 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019653127017970034, + "loss": 1.069, + "step": 244 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019650041256569792, + "loss": 0.9623, + "step": 245 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019646942074925172, + "loss": 1.0021, + "step": 246 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019643829477346188, + "loss": 1.1131, + "step": 247 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001964070346816151, + "loss": 1.1426, + "step": 248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001963756405171845, + "loss": 1.0761, + "step": 249 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019634411232382978, + "loss": 1.1112, + "step": 250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019631245014539698, + "loss": 1.081, + "step": 251 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019628065402591845, + "loss": 1.1446, + "step": 252 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019624872400961284, + "loss": 1.045, + "step": 253 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019621666014088494, + "loss": 1.0337, + "step": 254 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019618446246432583, + "loss": 1.1764, + "step": 255 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019615213102471257, + "loss": 1.0323, + "step": 256 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019611966586700823, + "loss": 1.0073, + "step": 257 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019608706703636188, + "loss": 1.1615, + "step": 258 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019605433457810855, + "loss": 1.1209, + "step": 259 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019602146853776894, + "loss": 1.0721, + "step": 260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001959884689610497, + "loss": 1.0967, + "step": 261 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019595533589384308, + "loss": 1.0284, + "step": 262 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019592206938222703, + "loss": 1.0148, + "step": 263 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019588866947246498, + "loss": 1.1434, + "step": 264 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019585513621100603, + "loss": 1.1125, + "step": 265 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001958214696444846, + "loss": 1.0812, + "step": 266 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019578766981972058, + "loss": 1.0611, + "step": 267 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019575373678371909, + "loss": 1.1029, + "step": 268 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019571967058367064, + "loss": 1.0692, + "step": 269 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019568547126695083, + "loss": 1.0581, + "step": 270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019565113888112036, + "loss": 0.9841, + "step": 271 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019561667347392508, + "loss": 1.0173, + "step": 272 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019558207509329584, + "loss": 1.0805, + "step": 273 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019554734378734824, + "loss": 1.088, + "step": 274 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019551247960438296, + "loss": 1.0481, + "step": 275 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019547748259288536, + "loss": 1.1747, + "step": 276 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001954423528015255, + "loss": 1.0407, + "step": 277 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019540709027915818, + "loss": 1.1412, + "step": 278 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953716950748227, + "loss": 1.075, + "step": 279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019533616723774294, + "loss": 0.9863, + "step": 280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953005068173272, + "loss": 1.1426, + "step": 281 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001952647138631682, + "loss": 1.0621, + "step": 282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019522878842504295, + "loss": 1.1007, + "step": 283 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019519273055291266, + "loss": 1.0632, + "step": 284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019515654029692278, + "loss": 1.126, + "step": 285 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019512021770740288, + "loss": 1.0946, + "step": 286 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001950837628348665, + "loss": 1.0639, + "step": 287 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019504717573001117, + "loss": 1.1432, + "step": 288 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019501045644371832, + "loss": 1.0619, + "step": 289 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001949736050270532, + "loss": 1.0597, + "step": 290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019493662153126481, + "loss": 1.0743, + "step": 291 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001948995060077859, + "loss": 1.1114, + "step": 292 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019486225850823266, + "loss": 1.1435, + "step": 293 + }, + { + "epoch": 0.44, + "learning_rate": 0.000194824879084405, + "loss": 1.1396, + "step": 294 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019478736778828624, + "loss": 1.1597, + "step": 295 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019474972467204297, + "loss": 1.0976, + "step": 296 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019471194978802533, + "loss": 1.0829, + "step": 297 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001946740431887665, + "loss": 1.0437, + "step": 298 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019463600492698296, + "loss": 1.0835, + "step": 299 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019459783505557424, + "loss": 1.0558, + "step": 300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001945595336276229, + "loss": 1.0656, + "step": 301 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019452110069639452, + "loss": 1.1487, + "step": 302 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019448253631533744, + "loss": 1.1383, + "step": 303 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019444384053808288, + "loss": 1.1582, + "step": 304 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019440501341844483, + "loss": 0.9999, + "step": 305 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019436605501041987, + "loss": 1.1317, + "step": 306 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019432696536818717, + "loss": 1.0944, + "step": 307 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019428774454610843, + "loss": 1.1624, + "step": 308 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019424839259872778, + "loss": 1.1644, + "step": 309 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019420890958077167, + "loss": 1.0486, + "step": 310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019416929554714888, + "loss": 1.0705, + "step": 311 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019412955055295034, + "loss": 1.023, + "step": 312 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019408967465344917, + "loss": 1.1144, + "step": 313 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019404966790410047, + "loss": 1.0378, + "step": 314 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019400953036054138, + "loss": 1.036, + "step": 315 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019396926207859084, + "loss": 1.0735, + "step": 316 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019392886311424973, + "loss": 1.0259, + "step": 317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001938883335237006, + "loss": 1.1603, + "step": 318 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938476733633076, + "loss": 1.1282, + "step": 319 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938068826896166, + "loss": 1.063, + "step": 320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019376596155935486, + "loss": 1.1176, + "step": 321 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019372491002943112, + "loss": 1.1307, + "step": 322 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019368372815693549, + "loss": 1.0412, + "step": 323 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019364241599913924, + "loss": 1.1353, + "step": 324 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019360097361349494, + "loss": 1.1293, + "step": 325 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001935594010576362, + "loss": 1.0885, + "step": 326 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019351769838937775, + "loss": 1.0944, + "step": 327 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019347586566671512, + "loss": 1.1435, + "step": 328 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001934339029478248, + "loss": 1.1217, + "step": 329 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019339181029106404, + "loss": 1.1801, + "step": 330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019334958775497083, + "loss": 1.1846, + "step": 331 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019330723539826375, + "loss": 1.0897, + "step": 332 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019326475327984192, + "loss": 1.0643, + "step": 333 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019322214145878487, + "loss": 1.0246, + "step": 334 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001931793999943526, + "loss": 1.1108, + "step": 335 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019313652894598543, + "loss": 1.0619, + "step": 336 + }, + { + "epoch": 0.5, + "eval_loss": 1.048388123512268, + "eval_runtime": 2.6045, + "eval_samples_per_second": 419.273, + "eval_steps_per_second": 26.493, + "step": 336 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019309352837330372, + "loss": 1.0014, + "step": 337 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001930503983361081, + "loss": 1.0786, + "step": 338 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019300713889437926, + "loss": 1.014, + "step": 339 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019296375010827773, + "loss": 1.1233, + "step": 340 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192920232038144, + "loss": 1.1052, + "step": 341 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001928765847444984, + "loss": 1.0138, + "step": 342 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019283280828804081, + "loss": 1.1536, + "step": 343 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019278890272965096, + "loss": 0.992, + "step": 344 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001927448681303879, + "loss": 1.1165, + "step": 345 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001927007045514903, + "loss": 1.0565, + "step": 346 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019265641205437611, + "loss": 1.0664, + "step": 347 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001926119907006426, + "loss": 1.0625, + "step": 348 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019256744055206622, + "loss": 1.0393, + "step": 349 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001925227616706026, + "loss": 1.125, + "step": 350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019247795411838627, + "loss": 1.0375, + "step": 351 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019243301795773086, + "loss": 1.0648, + "step": 352 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001923879532511287, + "loss": 1.0903, + "step": 353 + }, + { + "epoch": 0.53, + "learning_rate": 0.000192342760061251, + "loss": 1.1219, + "step": 354 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019229743845094755, + "loss": 1.054, + "step": 355 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001922519884832469, + "loss": 1.1206, + "step": 356 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019220641022135588, + "loss": 1.1125, + "step": 357 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019216070372865996, + "loss": 1.064, + "step": 358 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001921148690687228, + "loss": 1.0843, + "step": 359 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019206890630528634, + "loss": 1.1378, + "step": 360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019202281550227064, + "loss": 1.0399, + "step": 361 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919765967237739, + "loss": 1.1762, + "step": 362 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919302500340722, + "loss": 1.0538, + "step": 363 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019188377549761963, + "loss": 1.0343, + "step": 364 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001918371731790479, + "loss": 1.1027, + "step": 365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019179044314316664, + "loss": 1.036, + "step": 366 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019174358545496288, + "loss": 1.041, + "step": 367 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019169660017960137, + "loss": 1.0762, + "step": 368 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019164948738242409, + "loss": 1.0807, + "step": 369 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019160224712895055, + "loss": 1.037, + "step": 370 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019155487948487748, + "loss": 1.0625, + "step": 371 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001915073845160786, + "loss": 1.062, + "step": 372 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019145976228860496, + "loss": 1.1882, + "step": 373 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019141201286868435, + "loss": 1.1338, + "step": 374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019136413632272163, + "loss": 1.0174, + "step": 375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019131613271729833, + "loss": 1.0585, + "step": 376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019126800211917276, + "loss": 1.0495, + "step": 377 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001912197445952798, + "loss": 1.123, + "step": 378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019117136021273075, + "loss": 1.0517, + "step": 379 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001911228490388136, + "loss": 1.0545, + "step": 380 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019107421114099237, + "loss": 1.0302, + "step": 381 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019102544658690748, + "loss": 1.0908, + "step": 382 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019097655544437545, + "loss": 1.1425, + "step": 383 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019092753778138886, + "loss": 1.0686, + "step": 384 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001908783936661162, + "loss": 1.06, + "step": 385 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001908291231669019, + "loss": 1.1296, + "step": 386 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019077972635226604, + "loss": 1.1029, + "step": 387 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019073020329090444, + "loss": 1.0469, + "step": 388 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001906805540516885, + "loss": 1.0427, + "step": 389 + }, + { + "epoch": 0.58, + "learning_rate": 0.000190630778703665, + "loss": 1.0075, + "step": 390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019058087731605624, + "loss": 1.1146, + "step": 391 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001905308499582597, + "loss": 1.1161, + "step": 392 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019048069669984802, + "loss": 1.1419, + "step": 393 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019043041761056907, + "loss": 1.1586, + "step": 394 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019038001276034557, + "loss": 1.0765, + "step": 395 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019032948221927524, + "loss": 1.1225, + "step": 396 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001902788260576305, + "loss": 1.0247, + "step": 397 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019022804434585852, + "loss": 1.135, + "step": 398 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001901771371545811, + "loss": 1.1122, + "step": 399 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019012610455459446, + "loss": 1.075, + "step": 400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019007494661686935, + "loss": 1.1121, + "step": 401 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001900236634125507, + "loss": 1.0531, + "step": 402 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018997225501295772, + "loss": 1.0561, + "step": 403 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018992072148958368, + "loss": 1.0803, + "step": 404 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018986906291409595, + "loss": 1.0579, + "step": 405 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018981727935833567, + "loss": 1.0614, + "step": 406 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001897653708943179, + "loss": 0.9982, + "step": 407 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018971333759423142, + "loss": 1.1498, + "step": 408 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018966117953043852, + "loss": 1.1165, + "step": 409 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018960889677547505, + "loss": 1.1155, + "step": 410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018955648940205028, + "loss": 1.0017, + "step": 411 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018950395748304678, + "loss": 1.0556, + "step": 412 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018945130109152033, + "loss": 1.0248, + "step": 413 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018939852030069981, + "loss": 1.0155, + "step": 414 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018934561518398706, + "loss": 1.0248, + "step": 415 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018929258581495685, + "loss": 0.9835, + "step": 416 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001892394322673568, + "loss": 1.1602, + "step": 417 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001891861546151071, + "loss": 1.021, + "step": 418 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018913275293230069, + "loss": 1.0526, + "step": 419 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018907922729320285, + "loss": 1.0585, + "step": 420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018902557777225135, + "loss": 1.0327, + "step": 421 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018897180444405614, + "loss": 1.0448, + "step": 422 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001889179073833995, + "loss": 1.0776, + "step": 423 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888638866652356, + "loss": 1.0748, + "step": 424 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888097423646907, + "loss": 1.0482, + "step": 425 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018875547455706295, + "loss": 1.0394, + "step": 426 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018870108331782217, + "loss": 1.0646, + "step": 427 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018864656872260985, + "loss": 1.0338, + "step": 428 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018859193084723913, + "loss": 0.9848, + "step": 429 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001885371697676944, + "loss": 1.0587, + "step": 430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001884822855601316, + "loss": 1.0711, + "step": 431 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018842727830087778, + "loss": 1.0964, + "step": 432 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018837214806643115, + "loss": 1.0254, + "step": 433 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018831689493346095, + "loss": 1.0748, + "step": 434 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018826151897880728, + "loss": 1.0797, + "step": 435 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018820602027948114, + "loss": 1.1068, + "step": 436 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018815039891266418, + "loss": 1.081, + "step": 437 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001880946549557086, + "loss": 1.0685, + "step": 438 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018803878848613716, + "loss": 1.0916, + "step": 439 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018798279958164295, + "loss": 1.115, + "step": 440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018792668832008936, + "loss": 1.0048, + "step": 441 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001878704547795099, + "loss": 1.0386, + "step": 442 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018781409903810821, + "loss": 1.0283, + "step": 443 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018775762117425777, + "loss": 1.085, + "step": 444 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018770102126650198, + "loss": 1.0582, + "step": 445 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018764429939355392, + "loss": 1.0705, + "step": 446 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001875874556342963, + "loss": 1.1426, + "step": 447 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018753049006778132, + "loss": 1.0337, + "step": 448 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001874734027732306, + "loss": 1.0993, + "step": 449 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018741619383003507, + "loss": 1.0661, + "step": 450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018735886331775476, + "loss": 1.0564, + "step": 451 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018730141131611882, + "loss": 1.0989, + "step": 452 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001872438379050254, + "loss": 1.0984, + "step": 453 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018718614316454133, + "loss": 1.1173, + "step": 454 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018712832717490235, + "loss": 1.1005, + "step": 455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018707039001651277, + "loss": 1.0008, + "step": 456 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018701233176994533, + "loss": 1.0701, + "step": 457 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018695415251594123, + "loss": 1.0831, + "step": 458 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018689585233541003, + "loss": 1.1165, + "step": 459 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018683743130942928, + "loss": 1.0884, + "step": 460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018677888951924474, + "loss": 0.9882, + "step": 461 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018672022704627002, + "loss": 1.086, + "step": 462 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018666144397208668, + "loss": 1.0545, + "step": 463 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018660254037844388, + "loss": 1.0274, + "step": 464 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001865435163472584, + "loss": 1.0795, + "step": 465 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018648437196061462, + "loss": 1.022, + "step": 466 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001864251073007642, + "loss": 1.0717, + "step": 467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018636572245012606, + "loss": 1.1501, + "step": 468 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001863062174912863, + "loss": 1.1034, + "step": 469 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018624659250699805, + "loss": 1.0784, + "step": 470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018618684758018136, + "loss": 1.1274, + "step": 471 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001861269827939231, + "loss": 1.0643, + "step": 472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018606699823147676, + "loss": 1.1394, + "step": 473 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018600689397626246, + "loss": 0.9665, + "step": 474 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018594667011186678, + "loss": 1.058, + "step": 475 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018588632672204264, + "loss": 1.0706, + "step": 476 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858258638907091, + "loss": 1.0414, + "step": 477 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018576528170195146, + "loss": 1.1, + "step": 478 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018570458024002093, + "loss": 1.1114, + "step": 479 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018564375958933459, + "loss": 1.0596, + "step": 480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855828198344753, + "loss": 1.0897, + "step": 481 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018552176106019155, + "loss": 1.0316, + "step": 482 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018546058335139733, + "loss": 1.0516, + "step": 483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001853992867931721, + "loss": 1.0477, + "step": 484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018533787147076048, + "loss": 1.0432, + "step": 485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018527633746957234, + "loss": 1.0568, + "step": 486 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018521468487518264, + "loss": 1.114, + "step": 487 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018515291377333112, + "loss": 1.0664, + "step": 488 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850910242499225, + "loss": 1.0162, + "step": 489 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850290163910261, + "loss": 1.0829, + "step": 490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018496689028287572, + "loss": 1.1078, + "step": 491 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849046460118698, + "loss": 1.0533, + "step": 492 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018484228366457095, + "loss": 1.0923, + "step": 493 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018477980332770607, + "loss": 1.0516, + "step": 494 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018471720508816614, + "loss": 0.9826, + "step": 495 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018465448903300606, + "loss": 1.1581, + "step": 496 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001845916552494446, + "loss": 1.1268, + "step": 497 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018452870382486432, + "loss": 1.0483, + "step": 498 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018446563484681127, + "loss": 1.1792, + "step": 499 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018440244840299506, + "loss": 1.0918, + "step": 500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001843391445812886, + "loss": 0.9691, + "step": 501 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018427572346972805, + "loss": 1.0581, + "step": 502 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001842121851565128, + "loss": 1.0072, + "step": 503 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018414852973000503, + "loss": 0.9686, + "step": 504 + }, + { + "epoch": 0.75, + "eval_loss": 1.0276715755462646, + "eval_runtime": 2.6054, + "eval_samples_per_second": 419.124, + "eval_steps_per_second": 26.483, + "step": 504 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018408475727872995, + "loss": 1.1221, + "step": 505 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018402086789137546, + "loss": 1.087, + "step": 506 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018395686165679202, + "loss": 1.0599, + "step": 507 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018389273866399275, + "loss": 1.1844, + "step": 508 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018382849900215294, + "loss": 1.046, + "step": 509 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018376414276061032, + "loss": 0.9691, + "step": 510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018369967002886464, + "loss": 1.0996, + "step": 511 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001836350808965776, + "loss": 1.083, + "step": 512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018357037545357297, + "loss": 1.0371, + "step": 513 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018350555378983608, + "loss": 1.018, + "step": 514 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018344061599551398, + "loss": 1.095, + "step": 515 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018337556216091517, + "loss": 1.0871, + "step": 516 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001833103923765096, + "loss": 1.0774, + "step": 517 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018324510673292842, + "loss": 1.0337, + "step": 518 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001831797053209639, + "loss": 1.0059, + "step": 519 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018311418823156936, + "loss": 1.0744, + "step": 520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018304855555585894, + "loss": 0.9732, + "step": 521 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018298280738510752, + "loss": 1.1176, + "step": 522 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018291694381075056, + "loss": 1.1485, + "step": 523 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018285096492438424, + "loss": 1.1044, + "step": 524 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018278487081776476, + "loss": 0.9812, + "step": 525 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018271866158280884, + "loss": 1.0966, + "step": 526 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001826523373115931, + "loss": 1.2406, + "step": 527 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001825858980963543, + "loss": 1.0727, + "step": 528 + }, + { + "epoch": 0.79, + "learning_rate": 0.000182519344029489, + "loss": 0.9966, + "step": 529 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018245267520355346, + "loss": 1.081, + "step": 530 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018238589171126353, + "loss": 1.1104, + "step": 531 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018231899364549455, + "loss": 1.0535, + "step": 532 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018225198109928114, + "loss": 1.0801, + "step": 533 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018218485416581726, + "loss": 1.0726, + "step": 534 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018211761293845585, + "loss": 1.0923, + "step": 535 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018205025751070875, + "loss": 1.0551, + "step": 536 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018198278797624675, + "loss": 1.0495, + "step": 537 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001819152044288992, + "loss": 1.0589, + "step": 538 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018184750696265408, + "loss": 1.0487, + "step": 539 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001817796956716578, + "loss": 1.0491, + "step": 540 + }, + { + "epoch": 0.81, + "learning_rate": 0.000181711770650215, + "loss": 1.0981, + "step": 541 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018164373199278856, + "loss": 1.1706, + "step": 542 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001815755797939994, + "loss": 1.1024, + "step": 543 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018150731414862622, + "loss": 1.0488, + "step": 544 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018143893515160564, + "loss": 1.165, + "step": 545 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018137044289803181, + "loss": 1.0346, + "step": 546 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018130183748315645, + "loss": 1.1179, + "step": 547 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001812331190023886, + "loss": 1.0027, + "step": 548 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018116428755129459, + "loss": 1.1106, + "step": 549 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018109534322559783, + "loss": 1.0479, + "step": 550 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018102628612117865, + "loss": 1.0046, + "step": 551 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001809571163340744, + "loss": 0.9883, + "step": 552 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018088783396047893, + "loss": 1.1018, + "step": 553 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018081843909674276, + "loss": 1.1389, + "step": 554 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018074893183937283, + "loss": 1.0751, + "step": 555 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018067931228503246, + "loss": 1.1475, + "step": 556 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018060958053054096, + "loss": 1.0829, + "step": 557 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018053973667287387, + "loss": 1.0272, + "step": 558 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018046978080916252, + "loss": 1.0668, + "step": 559 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018039971303669407, + "loss": 1.0988, + "step": 560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018032953345291123, + "loss": 1.0339, + "step": 561 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001802592421554123, + "loss": 1.0654, + "step": 562 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018018883924195085, + "loss": 1.0157, + "step": 563 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018011832481043576, + "loss": 1.0738, + "step": 564 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001800476989589309, + "loss": 1.0742, + "step": 565 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001799769617856552, + "loss": 0.9861, + "step": 566 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001799061133889823, + "loss": 1.0788, + "step": 567 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017983515386744061, + "loss": 1.0539, + "step": 568 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017976408331971298, + "loss": 1.0875, + "step": 569 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001796929018446368, + "loss": 1.0765, + "step": 570 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017962160954120354, + "loss": 1.1336, + "step": 571 + }, + { + "epoch": 0.85, + "learning_rate": 0.000179550206508559, + "loss": 0.9674, + "step": 572 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017947869284600282, + "loss": 1.0607, + "step": 573 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001794070686529886, + "loss": 0.9959, + "step": 574 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017933533402912354, + "loss": 1.038, + "step": 575 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001792634890741685, + "loss": 1.1342, + "step": 576 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017919153388803774, + "loss": 1.0941, + "step": 577 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017911946857079888, + "loss": 1.1286, + "step": 578 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017904729322267256, + "loss": 1.0354, + "step": 579 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001789750079440326, + "loss": 1.1314, + "step": 580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017890261283540562, + "loss": 1.0365, + "step": 581 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017883010799747099, + "loss": 1.091, + "step": 582 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017875749353106062, + "loss": 0.9995, + "step": 583 + }, + { + "epoch": 0.87, + "learning_rate": 0.000178684769537159, + "loss": 1.0435, + "step": 584 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017861193611690287, + "loss": 1.0555, + "step": 585 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017853899337158112, + "loss": 1.0637, + "step": 586 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017846594140263474, + "loss": 1.064, + "step": 587 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017839278031165658, + "loss": 0.9879, + "step": 588 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017831951020039126, + "loss": 1.0846, + "step": 589 + }, + { + "epoch": 0.88, + "learning_rate": 0.000178246131170735, + "loss": 1.0373, + "step": 590 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017817264332473546, + "loss": 1.0377, + "step": 591 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017809904676459177, + "loss": 1.0932, + "step": 592 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017802534159265404, + "loss": 1.085, + "step": 593 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001779515279114236, + "loss": 1.0975, + "step": 594 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001778776058235526, + "loss": 1.1283, + "step": 595 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017780357543184397, + "loss": 1.0652, + "step": 596 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017772943683925122, + "loss": 1.0336, + "step": 597 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017765519014887842, + "loss": 0.9761, + "step": 598 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001775808354639799, + "loss": 1.0688, + "step": 599 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017750637288796016, + "loss": 1.1031, + "step": 600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017743180252437383, + "loss": 1.083, + "step": 601 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017735712447692538, + "loss": 1.1612, + "step": 602 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017728233884946903, + "loss": 1.1618, + "step": 603 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017720744574600863, + "loss": 1.144, + "step": 604 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001771324452706975, + "loss": 1.1174, + "step": 605 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017705733752783825, + "loss": 0.9728, + "step": 606 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001769821226218827, + "loss": 1.0599, + "step": 607 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001769068006574317, + "loss": 1.0639, + "step": 608 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017683137173923495, + "loss": 1.1278, + "step": 609 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017675583597219095, + "loss": 0.9925, + "step": 610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766801934613467, + "loss": 1.0457, + "step": 611 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766044443118978, + "loss": 1.0348, + "step": 612 + }, + { + "epoch": 0.91, + "learning_rate": 0.000176528588629188, + "loss": 1.022, + "step": 613 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017645262651870926, + "loss": 1.0027, + "step": 614 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017637655808610156, + "loss": 1.0491, + "step": 615 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017630038343715275, + "loss": 1.0413, + "step": 616 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017622410267779834, + "loss": 1.0358, + "step": 617 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017614771591412148, + "loss": 1.1125, + "step": 618 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017607122325235267, + "loss": 1.1185, + "step": 619 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017599462479886974, + "loss": 1.0738, + "step": 620 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017591792066019765, + "loss": 1.102, + "step": 621 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017584111094300827, + "loss": 1.065, + "step": 622 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001757641957541203, + "loss": 1.0514, + "step": 623 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001756871752004992, + "loss": 1.0396, + "step": 624 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017561004938925688, + "loss": 1.1027, + "step": 625 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017553281842765169, + "loss": 1.0223, + "step": 626 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017545548242308816, + "loss": 1.1793, + "step": 627 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017537804148311695, + "loss": 1.0642, + "step": 628 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017530049571543464, + "loss": 1.0682, + "step": 629 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017522284522788353, + "loss": 1.0476, + "step": 630 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017514509012845164, + "loss": 1.1064, + "step": 631 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017506723052527242, + "loss": 1.0258, + "step": 632 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017498926652662476, + "loss": 1.1954, + "step": 633 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001749111982409325, + "loss": 1.0637, + "step": 634 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017483302577676475, + "loss": 0.9685, + "step": 635 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017475474924283536, + "loss": 1.0465, + "step": 636 + }, + { + "epoch": 0.95, + "learning_rate": 0.000174676368748003, + "loss": 1.0161, + "step": 637 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017459788440127083, + "loss": 1.0479, + "step": 638 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017451929631178648, + "loss": 1.1166, + "step": 639 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001744406045888419, + "loss": 1.0634, + "step": 640 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017436180934187308, + "loss": 1.0826, + "step": 641 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017428291068046, + "loss": 1.07, + "step": 642 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017420390871432647, + "loss": 1.1167, + "step": 643 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017412480355334005, + "loss": 1.0347, + "step": 644 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017404559530751162, + "loss": 1.0393, + "step": 645 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017396628408699555, + "loss": 1.1108, + "step": 646 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017388687000208946, + "loss": 1.006, + "step": 647 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001738073531632339, + "loss": 1.0932, + "step": 648 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001737277336810124, + "loss": 1.0123, + "step": 649 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017364801166615124, + "loss": 1.1273, + "step": 650 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001735681872295192, + "loss": 0.9893, + "step": 651 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001734882604821276, + "loss": 1.0699, + "step": 652 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017340823153513002, + "loss": 1.0901, + "step": 653 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017332810049982208, + "loss": 1.0212, + "step": 654 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017324786748764155, + "loss": 0.9898, + "step": 655 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017316753261016783, + "loss": 1.0899, + "step": 656 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017308709597912213, + "loss": 1.085, + "step": 657 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017300655770636708, + "loss": 1.091, + "step": 658 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017292591790390665, + "loss": 1.0502, + "step": 659 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001728451766838861, + "loss": 1.2131, + "step": 660 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017276433415859167, + "loss": 1.1256, + "step": 661 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017268339044045042, + "loss": 1.0577, + "step": 662 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017260234564203032, + "loss": 1.0012, + "step": 663 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017252119987603973, + "loss": 1.0611, + "step": 664 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017243995325532755, + "loss": 1.1251, + "step": 665 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017235860589288277, + "loss": 1.0959, + "step": 666 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001722771579018347, + "loss": 1.1413, + "step": 667 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017219560939545246, + "loss": 1.0728, + "step": 668 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017211396048714498, + "loss": 1.0461, + "step": 669 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001720322112904608, + "loss": 1.1084, + "step": 670 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017195036191908797, + "loss": 1.1316, + "step": 671 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017186841248685383, + "loss": 1.0816, + "step": 672 + }, + { + "epoch": 1.0, + "eval_loss": 1.0170178413391113, + "eval_runtime": 2.6119, + "eval_samples_per_second": 418.079, + "eval_steps_per_second": 26.417, + "step": 672 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001717863631077249, + "loss": 1.0711, + "step": 673 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017170421389580667, + "loss": 1.1245, + "step": 674 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017162196496534342, + "loss": 1.0519, + "step": 675 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001715396164307182, + "loss": 1.104, + "step": 676 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017145716840645254, + "loss": 1.1193, + "step": 677 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017137462100720631, + "loss": 1.1238, + "step": 678 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017129197434777763, + "loss": 1.004, + "step": 679 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017120922854310257, + "loss": 1.0426, + "step": 680 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017112638370825515, + "loss": 1.0308, + "step": 681 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017104343995844715, + "loss": 1.0892, + "step": 682 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017096039740902784, + "loss": 1.0115, + "step": 683 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017087725617548385, + "loss": 1.1011, + "step": 684 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017079401637343914, + "loss": 0.9829, + "step": 685 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017071067811865476, + "loss": 0.9738, + "step": 686 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001706272415270286, + "loss": 1.0563, + "step": 687 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017054370671459532, + "loss": 1.0153, + "step": 688 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001704600737975262, + "loss": 1.0638, + "step": 689 + }, + { + "epoch": 1.01, + "learning_rate": 0.000170376342892129, + "loss": 1.0053, + "step": 690 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017029251411484765, + "loss": 1.0178, + "step": 691 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017020858758226229, + "loss": 1.0755, + "step": 692 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017012456341108885, + "loss": 0.9365, + "step": 693 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017004044171817925, + "loss": 1.0666, + "step": 694 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016995622262052092, + "loss": 1.041, + "step": 695 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016987190623523674, + "loss": 1.0387, + "step": 696 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016978749267958495, + "loss": 0.9332, + "step": 697 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016970298207095885, + "loss": 1.0737, + "step": 698 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016961837452688676, + "loss": 0.992, + "step": 699 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016953367016503182, + "loss": 0.9997, + "step": 700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016944886910319173, + "loss": 1.1054, + "step": 701 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016936397145929878, + "loss": 0.9876, + "step": 702 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016927897735141952, + "loss": 1.0158, + "step": 703 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016919388689775464, + "loss": 0.9771, + "step": 704 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016910870021663883, + "loss": 0.942, + "step": 705 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016902341742654065, + "loss": 1.0217, + "step": 706 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016893803864606222, + "loss": 1.0346, + "step": 707 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016885256399393924, + "loss": 0.9891, + "step": 708 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016876699358904068, + "loss": 0.9697, + "step": 709 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016868132755036875, + "loss": 1.0062, + "step": 710 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016859556599705856, + "loss": 0.9822, + "step": 711 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001685097090483781, + "loss": 1.0921, + "step": 712 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016842375682372805, + "loss": 1.0126, + "step": 713 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016833770944264153, + "loss": 1.0043, + "step": 714 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016825156702478407, + "loss": 0.952, + "step": 715 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016816532968995328, + "loss": 1.0423, + "step": 716 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016807899755807886, + "loss": 1.0465, + "step": 717 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016799257074922224, + "loss": 0.9827, + "step": 718 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016790604938357663, + "loss": 0.9798, + "step": 719 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016781943358146664, + "loss": 1.0268, + "step": 720 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016773272346334828, + "loss": 1.0007, + "step": 721 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001676459191498087, + "loss": 0.9989, + "step": 722 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016755902076156604, + "loss": 0.9374, + "step": 723 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016747202841946928, + "loss": 1.0031, + "step": 724 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016738494224449802, + "loss": 0.9751, + "step": 725 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016729776235776246, + "loss": 1.1055, + "step": 726 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016721048888050302, + "loss": 1.0527, + "step": 727 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001671231219340903, + "loss": 1.0048, + "step": 728 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001670356616400249, + "loss": 0.957, + "step": 729 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016694810811993723, + "loss": 1.0598, + "step": 730 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016686046149558736, + "loss": 1.02, + "step": 731 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016677272188886483, + "loss": 0.9973, + "step": 732 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016668488942178856, + "loss": 1.0685, + "step": 733 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016659696421650645, + "loss": 0.9783, + "step": 734 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016650894639529544, + "loss": 0.9767, + "step": 735 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016642083608056141, + "loss": 1.0192, + "step": 736 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016633263339483866, + "loss": 1.0121, + "step": 737 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016624433846079012, + "loss": 0.9817, + "step": 738 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016615595140120686, + "loss": 1.1145, + "step": 739 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016606747233900815, + "loss": 0.9862, + "step": 740 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016597890139724125, + "loss": 1.0606, + "step": 741 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658902386990811, + "loss": 1.0416, + "step": 742 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658014843678303, + "loss": 0.9971, + "step": 743 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016571263852691888, + "loss": 1.0318, + "step": 744 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001656237012999041, + "loss": 1.0633, + "step": 745 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001655346728104704, + "loss": 1.0418, + "step": 746 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016544555318242897, + "loss": 0.9308, + "step": 747 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016535634253971794, + "loss": 1.1049, + "step": 748 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001652670410064019, + "loss": 0.9377, + "step": 749 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016517764870667182, + "loss": 0.9934, + "step": 750 + }, + { + "epoch": 1.1, + "learning_rate": 0.000165088165764845, + "loss": 1.0467, + "step": 751 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016499859230536466, + "loss": 1.0172, + "step": 752 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001649089284528001, + "loss": 0.9922, + "step": 753 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016481917433184607, + "loss": 1.0373, + "step": 754 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001647293300673231, + "loss": 1.0377, + "step": 755 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016463939578417692, + "loss": 0.9991, + "step": 756 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016454937160747854, + "loss": 1.0657, + "step": 757 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016445925766242391, + "loss": 0.9954, + "step": 758 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001643690540743339, + "loss": 1.018, + "step": 759 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016427876096865394, + "loss": 1.01, + "step": 760 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001641883784709541, + "loss": 0.9318, + "step": 761 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001640979067069286, + "loss": 1.0174, + "step": 762 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016400734580239594, + "loss": 1.0886, + "step": 763 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001639166958832985, + "loss": 1.0316, + "step": 764 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001638259570757025, + "loss": 1.0514, + "step": 765 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001637351295057978, + "loss": 0.9914, + "step": 766 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016364421329989755, + "loss": 1.0529, + "step": 767 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016355320858443842, + "loss": 0.9689, + "step": 768 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016346211548597995, + "loss": 1.0398, + "step": 769 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001633709341312046, + "loss": 1.0127, + "step": 770 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016327966464691778, + "loss": 1.1388, + "step": 771 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016318830716004722, + "loss": 0.9659, + "step": 772 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016309686179764317, + "loss": 0.9907, + "step": 773 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016300532868687806, + "loss": 0.9168, + "step": 774 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629137079550463, + "loss": 1.06, + "step": 775 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016282199972956425, + "loss": 0.9826, + "step": 776 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016273020413796983, + "loss": 1.0496, + "step": 777 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001626383213079226, + "loss": 1.0245, + "step": 778 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016254635136720328, + "loss": 1.036, + "step": 779 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001624542944437139, + "loss": 1.0283, + "step": 780 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016236215066547734, + "loss": 1.0078, + "step": 781 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016226992016063723, + "loss": 0.9819, + "step": 782 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016217760305745803, + "loss": 1.0687, + "step": 783 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001620851994843244, + "loss": 1.0523, + "step": 784 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016199270956974128, + "loss": 1.0279, + "step": 785 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016190013344233388, + "loss": 1.0559, + "step": 786 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016180747123084705, + "loss": 1.0844, + "step": 787 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016171472306414554, + "loss": 1.0724, + "step": 788 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016162188907121354, + "loss": 0.9696, + "step": 789 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016152896938115464, + "loss": 0.9551, + "step": 790 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001614359641231916, + "loss": 1.0032, + "step": 791 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001613428734266662, + "loss": 1.1404, + "step": 792 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016124969742103897, + "loss": 1.0329, + "step": 793 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016115643623588915, + "loss": 1.039, + "step": 794 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001610630900009144, + "loss": 1.0231, + "step": 795 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609696588459307, + "loss": 1.0659, + "step": 796 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016087614290087208, + "loss": 1.0029, + "step": 797 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001607825422957905, + "loss": 0.985, + "step": 798 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016068885716085567, + "loss": 0.9392, + "step": 799 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016059508762635482, + "loss": 1.006, + "step": 800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016050123382269264, + "loss": 1.0748, + "step": 801 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001604072958803909, + "loss": 1.1378, + "step": 802 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016031327393008845, + "loss": 1.058, + "step": 803 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016021916810254097, + "loss": 0.9827, + "step": 804 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016012497852862075, + "loss": 0.9572, + "step": 805 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016003070533931657, + "loss": 1.0042, + "step": 806 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015993634866573347, + "loss": 0.9521, + "step": 807 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001598419086390927, + "loss": 0.9395, + "step": 808 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015974738539073125, + "loss": 1.0902, + "step": 809 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015965277905210195, + "loss": 1.0408, + "step": 810 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015955808975477319, + "loss": 1.0436, + "step": 811 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015946331763042867, + "loss": 1.0845, + "step": 812 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015936846281086736, + "loss": 1.0752, + "step": 813 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015927352542800317, + "loss": 1.0832, + "step": 814 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015917850561386488, + "loss": 0.9901, + "step": 815 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015908340350059583, + "loss": 1.0311, + "step": 816 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015898821922045385, + "loss": 0.9858, + "step": 817 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001588929529058111, + "loss": 0.9541, + "step": 818 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015879760468915372, + "loss": 0.9516, + "step": 819 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015870217470308188, + "loss": 1.0791, + "step": 820 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015860666308030932, + "loss": 0.9099, + "step": 821 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015851106995366337, + "loss": 1.0983, + "step": 822 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015841539545608478, + "loss": 0.9951, + "step": 823 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015831963972062733, + "loss": 0.9661, + "step": 824 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015822380288045792, + "loss": 1.0111, + "step": 825 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001581278850688561, + "loss": 1.0436, + "step": 826 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015803188641921417, + "loss": 1.0916, + "step": 827 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001579358070650367, + "loss": 1.0347, + "step": 828 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001578396471399406, + "loss": 0.9978, + "step": 829 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577434067776548, + "loss": 1.0036, + "step": 830 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015764708611202015, + "loss": 1.0387, + "step": 831 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015755068527698902, + "loss": 1.0172, + "step": 832 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015745420440662543, + "loss": 0.9723, + "step": 833 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001573576436351046, + "loss": 0.9662, + "step": 834 + }, + { + "epoch": 1.23, + "learning_rate": 0.000157261003096713, + "loss": 0.9849, + "step": 835 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015716428292584787, + "loss": 1.0198, + "step": 836 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015706748325701732, + "loss": 0.9015, + "step": 837 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001569706042248399, + "loss": 1.001, + "step": 838 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001568736459640447, + "loss": 0.9681, + "step": 839 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015677660860947078, + "loss": 1.0513, + "step": 840 + }, + { + "epoch": 1.23, + "eval_loss": 1.008791446685791, + "eval_runtime": 2.6133, + "eval_samples_per_second": 417.867, + "eval_steps_per_second": 26.404, + "step": 840 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001566794922960674, + "loss": 0.9829, + "step": 841 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015658229715889347, + "loss": 1.0362, + "step": 842 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015648502333311757, + "loss": 0.9736, + "step": 843 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001563876709540178, + "loss": 1.0457, + "step": 844 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015629024015698136, + "loss": 0.9786, + "step": 845 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015619273107750462, + "loss": 1.0595, + "step": 846 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001560951438511927, + "loss": 1.0307, + "step": 847 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015599747861375955, + "loss": 1.0386, + "step": 848 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015589973550102747, + "loss": 0.9916, + "step": 849 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015580191464892716, + "loss": 0.9652, + "step": 850 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015570401619349736, + "loss": 0.9691, + "step": 851 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015560604027088477, + "loss": 1.0006, + "step": 852 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015550798701734385, + "loss": 1.0271, + "step": 853 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015540985656923645, + "loss": 1.0591, + "step": 854 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015531164906303207, + "loss": 0.967, + "step": 855 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015521336463530705, + "loss": 1.0466, + "step": 856 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001551150034227449, + "loss": 0.9953, + "step": 857 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001550165655621359, + "loss": 0.9899, + "step": 858 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015491805119037684, + "loss": 0.9742, + "step": 859 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015481946044447099, + "loss": 0.9865, + "step": 860 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001547207934615278, + "loss": 0.9384, + "step": 861 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015462205037876275, + "loss": 1.0216, + "step": 862 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015452323133349714, + "loss": 0.9467, + "step": 863 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001544243364631579, + "loss": 1.0038, + "step": 864 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001543253659052775, + "loss": 0.978, + "step": 865 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015422631979749354, + "loss": 1.0434, + "step": 866 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015412719827754873, + "loss": 1.0091, + "step": 867 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015402800148329071, + "loss": 0.9598, + "step": 868 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015392872955267175, + "loss": 0.9876, + "step": 869 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015382938262374865, + "loss": 0.9559, + "step": 870 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001537299608346824, + "loss": 0.9984, + "step": 871 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015363046432373824, + "loss": 1.0171, + "step": 872 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001535308932292853, + "loss": 1.0188, + "step": 873 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015343124768979637, + "loss": 0.9613, + "step": 874 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015333152784384777, + "loss": 0.9572, + "step": 875 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001532317338301192, + "loss": 1.0093, + "step": 876 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015313186578739353, + "loss": 0.9935, + "step": 877 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001530319238545565, + "loss": 1.0371, + "step": 878 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015293190817059667, + "loss": 1.0022, + "step": 879 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015283181887460517, + "loss": 1.0033, + "step": 880 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015273165610577542, + "loss": 0.9986, + "step": 881 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015263142000340312, + "loss": 1.0495, + "step": 882 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001525311107068859, + "loss": 1.017, + "step": 883 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015243072835572318, + "loss": 0.9757, + "step": 884 + }, + { + "epoch": 1.3, + "learning_rate": 0.000152330273089516, + "loss": 1.0342, + "step": 885 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001522297450479668, + "loss": 1.0059, + "step": 886 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015212914437087922, + "loss": 0.9845, + "step": 887 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001520284711981579, + "loss": 1.0365, + "step": 888 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001519277256698083, + "loss": 0.9521, + "step": 889 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001518269079259366, + "loss": 1.0867, + "step": 890 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015172601810674915, + "loss": 1.0444, + "step": 891 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015162505635255287, + "loss": 1.077, + "step": 892 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015152402280375454, + "loss": 0.9883, + "step": 893 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001514229176008607, + "loss": 0.9819, + "step": 894 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015132174088447776, + "loss": 0.9912, + "step": 895 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015122049279531143, + "loss": 0.9575, + "step": 896 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015111917347416671, + "loss": 1.0356, + "step": 897 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015101778306194765, + "loss": 0.9963, + "step": 898 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001509163216996572, + "loss": 0.9728, + "step": 899 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015081478952839693, + "loss": 1.0402, + "step": 900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015071318668936695, + "loss": 1.0287, + "step": 901 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015061151332386566, + "loss": 1.0505, + "step": 902 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015050976957328938, + "loss": 0.9814, + "step": 903 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015040795557913245, + "loss": 1.0083, + "step": 904 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015030607148298696, + "loss": 1.0871, + "step": 905 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015020411742654237, + "loss": 1.0943, + "step": 906 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001501020935515854, + "loss": 1.0631, + "step": 907 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015000000000000001, + "loss": 1.0615, + "step": 908 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014989783691376696, + "loss": 0.8933, + "step": 909 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001497956044349637, + "loss": 1.012, + "step": 910 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014969330270576427, + "loss": 0.9215, + "step": 911 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014959093186843895, + "loss": 0.9894, + "step": 912 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014948849206535412, + "loss": 1.0053, + "step": 913 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014938598343897214, + "loss": 1.0975, + "step": 914 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014928340613185097, + "loss": 1.068, + "step": 915 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001491807602866442, + "loss": 0.9838, + "step": 916 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014907804604610063, + "loss": 1.1493, + "step": 917 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014897526355306428, + "loss": 0.9491, + "step": 918 + }, + { + "epoch": 1.35, + "learning_rate": 0.000148872412950474, + "loss": 1.0252, + "step": 919 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014876949438136347, + "loss": 0.9555, + "step": 920 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014866650798886074, + "loss": 0.9831, + "step": 921 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001485634539161883, + "loss": 1.0957, + "step": 922 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484603323066627, + "loss": 0.9606, + "step": 923 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014835714330369446, + "loss": 1.0643, + "step": 924 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014825388705078777, + "loss": 1.0219, + "step": 925 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014815056369154038, + "loss": 1.1315, + "step": 926 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001480471733696434, + "loss": 1.0406, + "step": 927 + }, + { + "epoch": 1.36, + "learning_rate": 0.000147943716228881, + "loss": 1.0284, + "step": 928 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014784019241313026, + "loss": 1.035, + "step": 929 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014773660206636105, + "loss": 1.0562, + "step": 930 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001476329453326357, + "loss": 0.9813, + "step": 931 + }, + { + "epoch": 1.37, + "learning_rate": 0.000147529222356109, + "loss": 1.0865, + "step": 932 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001474254332810277, + "loss": 1.0074, + "step": 933 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014732157825173044, + "loss": 1.0855, + "step": 934 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014721765741264786, + "loss": 0.9785, + "step": 935 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001471136709083018, + "loss": 1.011, + "step": 936 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014700961888330563, + "loss": 1.0484, + "step": 937 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001469055014823637, + "loss": 1.0435, + "step": 938 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014680131885027141, + "loss": 1.0176, + "step": 939 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014669707113191483, + "loss": 0.9542, + "step": 940 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014659275847227042, + "loss": 0.9526, + "step": 941 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014648838101640518, + "loss": 0.9681, + "step": 942 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014638393890947603, + "loss": 0.9072, + "step": 943 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001462794322967299, + "loss": 0.9939, + "step": 944 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014617486132350343, + "loss": 1.018, + "step": 945 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001460702261352226, + "loss": 0.8993, + "step": 946 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014596552687740302, + "loss": 1.0134, + "step": 947 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014586076369564908, + "loss": 0.947, + "step": 948 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014575593673565426, + "loss": 1.0697, + "step": 949 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014565104614320065, + "loss": 1.006, + "step": 950 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014554609206415885, + "loss": 1.0262, + "step": 951 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014544107464448775, + "loss": 0.9809, + "step": 952 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001453359940302344, + "loss": 0.981, + "step": 953 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014523085036753354, + "loss": 0.9925, + "step": 954 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014512564380260787, + "loss": 1.0199, + "step": 955 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014502037448176734, + "loss": 0.9715, + "step": 956 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014491504255140927, + "loss": 1.0072, + "step": 957 + }, + { + "epoch": 1.41, + "learning_rate": 0.000144809648158018, + "loss": 1.0659, + "step": 958 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014470419144816483, + "loss": 1.0538, + "step": 959 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001445986725685076, + "loss": 1.0571, + "step": 960 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014449309166579072, + "loss": 0.9701, + "step": 961 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014438744888684482, + "loss": 0.9618, + "step": 962 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001442817443785865, + "loss": 0.9179, + "step": 963 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014417597828801832, + "loss": 1.0613, + "step": 964 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014407015076222846, + "loss": 0.9558, + "step": 965 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014396426194839042, + "loss": 0.9823, + "step": 966 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014385831199376317, + "loss": 0.9968, + "step": 967 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014375230104569044, + "loss": 0.9829, + "step": 968 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014364622925160098, + "loss": 1.0552, + "step": 969 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014354009675900803, + "loss": 0.993, + "step": 970 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014343390371550935, + "loss": 1.0927, + "step": 971 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014332765026878687, + "loss": 1.0387, + "step": 972 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014322133656660647, + "loss": 0.9255, + "step": 973 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014311496275681783, + "loss": 1.0093, + "step": 974 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014300852898735435, + "loss": 1.0078, + "step": 975 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014290203540623267, + "loss": 0.9161, + "step": 976 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014279548216155266, + "loss": 1.03, + "step": 977 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014268886940149714, + "loss": 1.0364, + "step": 978 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001425821972743318, + "loss": 0.9768, + "step": 979 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001424754659284048, + "loss": 1.1229, + "step": 980 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001423686755121466, + "loss": 1.0362, + "step": 981 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014226182617406996, + "loss": 0.9522, + "step": 982 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014215491806276944, + "loss": 1.0479, + "step": 983 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014204795132692144, + "loss": 1.0671, + "step": 984 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014194092611528384, + "loss": 0.8983, + "step": 985 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014183384257669581, + "loss": 1.004, + "step": 986 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014172670086007774, + "loss": 1.0972, + "step": 987 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014161950111443077, + "loss": 1.0198, + "step": 988 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014151224348883692, + "loss": 1.0257, + "step": 989 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014140492813245856, + "loss": 0.9717, + "step": 990 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001412975551945384, + "loss": 0.9455, + "step": 991 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001411901248243993, + "loss": 1.0372, + "step": 992 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001410826371714438, + "loss": 0.9961, + "step": 993 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014097509238515432, + "loss": 1.0599, + "step": 994 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014086749061509258, + "loss": 1.0166, + "step": 995 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014075983201089964, + "loss": 1.0254, + "step": 996 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014065211672229555, + "loss": 0.9979, + "step": 997 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014054434489907915, + "loss": 1.0365, + "step": 998 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014043651669112808, + "loss": 1.0075, + "step": 999 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014032863224839814, + "loss": 0.9743, + "step": 1000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014022069172092352, + "loss": 1.0056, + "step": 1001 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014011269525881636, + "loss": 0.9647, + "step": 1002 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014000464301226656, + "loss": 1.0912, + "step": 1003 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013989653513154165, + "loss": 0.8811, + "step": 1004 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013978837176698646, + "loss": 1.0667, + "step": 1005 + }, + { + "epoch": 1.48, + "learning_rate": 0.000139680153069023, + "loss": 1.0096, + "step": 1006 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013957187918815032, + "loss": 0.926, + "step": 1007 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001394635502749441, + "loss": 1.0814, + "step": 1008 + }, + { + "epoch": 1.48, + "eval_loss": 1.0040607452392578, + "eval_runtime": 2.6168, + "eval_samples_per_second": 417.304, + "eval_steps_per_second": 26.368, + "step": 1008 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001393551664800566, + "loss": 1.0941, + "step": 1009 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013924672795421637, + "loss": 1.044, + "step": 1010 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013913823484822815, + "loss": 1.049, + "step": 1011 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013902968731297255, + "loss": 0.9891, + "step": 1012 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013892108549940583, + "loss": 0.9663, + "step": 1013 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013881242955855974, + "loss": 1.0298, + "step": 1014 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001387037196415414, + "loss": 1.0083, + "step": 1015 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001385949558995329, + "loss": 0.9182, + "step": 1016 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013848613848379114, + "loss": 1.013, + "step": 1017 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013837726754564785, + "loss": 1.0022, + "step": 1018 + }, + { + "epoch": 1.5, + "learning_rate": 0.000138268343236509, + "loss": 0.9423, + "step": 1019 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013815936570785487, + "loss": 1.058, + "step": 1020 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013805033511123975, + "loss": 0.931, + "step": 1021 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013794125159829172, + "loss": 1.0137, + "step": 1022 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013783211532071246, + "loss": 1.0517, + "step": 1023 + }, + { + "epoch": 1.51, + "learning_rate": 0.000137722926430277, + "loss": 1.0259, + "step": 1024 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013761368507883359, + "loss": 1.0263, + "step": 1025 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013750439141830339, + "loss": 1.0286, + "step": 1026 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013739504560068033, + "loss": 0.9749, + "step": 1027 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013728564777803088, + "loss": 0.9317, + "step": 1028 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013717619810249378, + "loss": 1.0653, + "step": 1029 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013706669672627997, + "loss": 0.9623, + "step": 1030 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013695714380167223, + "loss": 0.9911, + "step": 1031 + }, + { + "epoch": 1.52, + "learning_rate": 0.000136847539481025, + "loss": 0.9843, + "step": 1032 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001367378839167643, + "loss": 0.981, + "step": 1033 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013662817726138728, + "loss": 1.0651, + "step": 1034 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013651841966746232, + "loss": 1.0602, + "step": 1035 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001364086112876284, + "loss": 0.9524, + "step": 1036 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013629875227459532, + "loss": 1.0264, + "step": 1037 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013618884278114324, + "loss": 1.0691, + "step": 1038 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013607888296012259, + "loss": 1.0527, + "step": 1039 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001359688729644536, + "loss": 0.9629, + "step": 1040 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001358588129471264, + "loss": 0.957, + "step": 1041 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013574870306120077, + "loss": 1.0976, + "step": 1042 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013563854345980569, + "loss": 0.9317, + "step": 1043 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013552833429613938, + "loss": 1.0359, + "step": 1044 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001354180757234689, + "loss": 1.0642, + "step": 1045 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001353077678951301, + "loss": 1.0526, + "step": 1046 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013519741096452726, + "loss": 1.0276, + "step": 1047 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013508700508513307, + "loss": 1.0471, + "step": 1048 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001349765504104881, + "loss": 1.0353, + "step": 1049 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013486604709420102, + "loss": 1.0025, + "step": 1050 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013475549528994786, + "loss": 0.9019, + "step": 1051 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013464489515147238, + "loss": 1.0453, + "step": 1052 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013453424683258528, + "loss": 1.0395, + "step": 1053 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001344235504871645, + "loss": 0.8939, + "step": 1054 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013431280626915467, + "loss": 0.9198, + "step": 1055 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013420201433256689, + "loss": 1.0046, + "step": 1056 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001340911748314788, + "loss": 0.9197, + "step": 1057 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013398028792003413, + "loss": 0.9547, + "step": 1058 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013386935375244246, + "loss": 0.968, + "step": 1059 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013375837248297926, + "loss": 0.9611, + "step": 1060 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013364734426598527, + "loss": 1.0125, + "step": 1061 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013353626925586672, + "loss": 1.0179, + "step": 1062 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013342514760709485, + "loss": 1.04, + "step": 1063 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013331397947420576, + "loss": 0.9251, + "step": 1064 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013320276501180015, + "loss": 1.0762, + "step": 1065 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013309150437454322, + "loss": 1.0137, + "step": 1066 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013298019771716435, + "loss": 0.9981, + "step": 1067 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001328688451944569, + "loss": 1.003, + "step": 1068 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013275744696127805, + "loss": 1.0307, + "step": 1069 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013264600317254853, + "loss": 1.0257, + "step": 1070 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013253451398325249, + "loss": 1.0426, + "step": 1071 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013242297954843711, + "loss": 1.0167, + "step": 1072 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013231140002321253, + "loss": 1.012, + "step": 1073 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013219977556275163, + "loss": 1.0649, + "step": 1074 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013208810632228977, + "loss": 0.9297, + "step": 1075 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013197639245712454, + "loss": 0.9772, + "step": 1076 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013186463412261565, + "loss": 1.0194, + "step": 1077 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013175283147418465, + "loss": 1.0596, + "step": 1078 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013164098466731468, + "loss": 0.9938, + "step": 1079 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013152909385755025, + "loss": 0.9405, + "step": 1080 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001314171592004972, + "loss": 1.0175, + "step": 1081 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013130518085182225, + "loss": 0.9994, + "step": 1082 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013119315896725287, + "loss": 0.9524, + "step": 1083 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013108109370257712, + "loss": 0.9112, + "step": 1084 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013096898521364338, + "loss": 0.9339, + "step": 1085 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013085683365636014, + "loss": 0.9718, + "step": 1086 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001307446391866958, + "loss": 0.969, + "step": 1087 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013063240196067836, + "loss": 1.0255, + "step": 1088 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013052012213439536, + "loss": 1.0119, + "step": 1089 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013040779986399362, + "loss": 1.0396, + "step": 1090 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013029543530567884, + "loss": 1.0202, + "step": 1091 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001301830286157157, + "loss": 0.9024, + "step": 1092 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013007057995042732, + "loss": 1.0079, + "step": 1093 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001299580894661953, + "loss": 0.9771, + "step": 1094 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001298455573194594, + "loss": 0.9942, + "step": 1095 + }, + { + "epoch": 1.61, + "learning_rate": 0.00012973298366671725, + "loss": 0.9879, + "step": 1096 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012962036866452422, + "loss": 0.9365, + "step": 1097 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001295077124694932, + "loss": 1.0128, + "step": 1098 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012939501523829444, + "loss": 1.0707, + "step": 1099 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012928227712765504, + "loss": 0.9769, + "step": 1100 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012916949829435922, + "loss": 1.0208, + "step": 1101 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001290566788952477, + "loss": 1.0376, + "step": 1102 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012894381908721756, + "loss": 1.0588, + "step": 1103 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001288309190272222, + "loss": 1.0217, + "step": 1104 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012871797887227087, + "loss": 0.9684, + "step": 1105 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012860499877942875, + "loss": 0.9753, + "step": 1106 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012849197890581638, + "loss": 1.0094, + "step": 1107 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012837891940860972, + "loss": 1.0346, + "step": 1108 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012826582044503978, + "loss": 0.8741, + "step": 1109 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012815268217239252, + "loss": 1.0223, + "step": 1110 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012803950474800862, + "loss": 0.8748, + "step": 1111 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012792628832928302, + "loss": 1.0296, + "step": 1112 + }, + { + "epoch": 1.64, + "learning_rate": 0.000127813033073665, + "loss": 0.9993, + "step": 1113 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012769973913865794, + "loss": 1.0555, + "step": 1114 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012758640668181882, + "loss": 1.0245, + "step": 1115 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001274730358607583, + "loss": 0.9502, + "step": 1116 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012735962683314042, + "loss": 1.0165, + "step": 1117 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001272461797566823, + "loss": 1.0669, + "step": 1118 + }, + { + "epoch": 1.65, + "learning_rate": 0.000127132694789154, + "loss": 0.8676, + "step": 1119 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001270191720883782, + "loss": 0.9432, + "step": 1120 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012690561181223024, + "loss": 1.0614, + "step": 1121 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001267920141186375, + "loss": 0.9924, + "step": 1122 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012667837916557954, + "loss": 1.139, + "step": 1123 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012656470711108764, + "loss": 1.0043, + "step": 1124 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012645099811324476, + "loss": 1.0747, + "step": 1125 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001263372523301852, + "loss": 0.9668, + "step": 1126 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012622346992009447, + "loss": 0.9931, + "step": 1127 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012610965104120885, + "loss": 0.9393, + "step": 1128 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012599579585181552, + "loss": 0.9918, + "step": 1129 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012588190451025207, + "loss": 1.0172, + "step": 1130 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012576797717490644, + "loss": 1.0586, + "step": 1131 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012565401400421651, + "loss": 1.0482, + "step": 1132 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012554001515667008, + "loss": 1.0548, + "step": 1133 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012542598079080456, + "loss": 1.0092, + "step": 1134 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012531191106520672, + "loss": 1.0162, + "step": 1135 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012519780613851254, + "loss": 1.0387, + "step": 1136 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001250836661694069, + "loss": 0.9607, + "step": 1137 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012496949131662348, + "loss": 1.0025, + "step": 1138 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012485528173894448, + "loss": 1.0014, + "step": 1139 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012474103759520027, + "loss": 0.9838, + "step": 1140 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001246267590442694, + "loss": 1.0384, + "step": 1141 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012451244624507831, + "loss": 0.9958, + "step": 1142 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012439809935660095, + "loss": 0.9927, + "step": 1143 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001242837185378587, + "loss": 1.0082, + "step": 1144 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012416930394792026, + "loss": 0.9729, + "step": 1145 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012405485574590113, + "loss": 1.0464, + "step": 1146 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012394037409096357, + "loss": 0.987, + "step": 1147 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001238258591423165, + "loss": 0.9402, + "step": 1148 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012371131105921504, + "loss": 1.0293, + "step": 1149 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012359673000096033, + "loss": 0.9418, + "step": 1150 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001234821161268995, + "loss": 0.964, + "step": 1151 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012336746959642526, + "loss": 0.9982, + "step": 1152 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001232527905689757, + "loss": 0.9364, + "step": 1153 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012313807920403419, + "loss": 0.9399, + "step": 1154 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001230233356611289, + "loss": 1.015, + "step": 1155 + }, + { + "epoch": 1.7, + "learning_rate": 0.000122908560099833, + "loss": 1.0214, + "step": 1156 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012279375267976398, + "loss": 1.0262, + "step": 1157 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012267891356058377, + "loss": 1.0277, + "step": 1158 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012256404290199825, + "loss": 1.0095, + "step": 1159 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012244914086375724, + "loss": 1.0314, + "step": 1160 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012233420760565428, + "loss": 0.8282, + "step": 1161 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012221924328752616, + "loss": 0.9709, + "step": 1162 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012210424806925301, + "loss": 0.941, + "step": 1163 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012198922211075778, + "loss": 0.9716, + "step": 1164 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012187416557200633, + "loss": 1.0125, + "step": 1165 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012175907861300697, + "loss": 1.0159, + "step": 1166 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012164396139381029, + "loss": 0.9306, + "step": 1167 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012152881407450905, + "loss": 1.1056, + "step": 1168 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012141363681523776, + "loss": 1.0113, + "step": 1169 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012129842977617265, + "loss": 0.9983, + "step": 1170 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012118319311753137, + "loss": 1.0076, + "step": 1171 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012106792699957263, + "loss": 1.1181, + "step": 1172 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012095263158259631, + "loss": 0.8759, + "step": 1173 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012083730702694291, + "loss": 0.9855, + "step": 1174 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012072195349299345, + "loss": 1.1361, + "step": 1175 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012060657114116926, + "loss": 1.0275, + "step": 1176 + }, + { + "epoch": 1.73, + "eval_loss": 0.9928944110870361, + "eval_runtime": 2.6469, + "eval_samples_per_second": 412.56, + "eval_steps_per_second": 26.068, + "step": 1176 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001204911601319318, + "loss": 1.0256, + "step": 1177 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012037572062578238, + "loss": 0.9218, + "step": 1178 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012026025278326187, + "loss": 1.0394, + "step": 1179 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012014475676495052, + "loss": 1.0318, + "step": 1180 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012002923273146794, + "loss": 1.0361, + "step": 1181 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011991368084347252, + "loss": 1.0093, + "step": 1182 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011979810126166151, + "loss": 0.9527, + "step": 1183 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011968249414677055, + "loss": 1.0946, + "step": 1184 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011956685965957368, + "loss": 1.0124, + "step": 1185 + }, + { + "epoch": 1.75, + "learning_rate": 0.000119451197960883, + "loss": 1.0074, + "step": 1186 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011933550921154834, + "loss": 1.0315, + "step": 1187 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001192197935724573, + "loss": 0.9915, + "step": 1188 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011910405120453476, + "loss": 0.9823, + "step": 1189 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011898828226874284, + "loss": 1.0294, + "step": 1190 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011887248692608057, + "loss": 1.0176, + "step": 1191 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011875666533758372, + "loss": 1.0486, + "step": 1192 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011864081766432456, + "loss": 1.0237, + "step": 1193 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011852494406741165, + "loss": 1.0469, + "step": 1194 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011840904470798955, + "loss": 0.9545, + "step": 1195 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011829311974723867, + "loss": 0.9812, + "step": 1196 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011817716934637509, + "loss": 1.0503, + "step": 1197 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001180611936666502, + "loss": 1.0693, + "step": 1198 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011794519286935055, + "loss": 0.9627, + "step": 1199 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011782916711579759, + "loss": 0.9728, + "step": 1200 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001177131165673476, + "loss": 1.13, + "step": 1201 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001175970413853912, + "loss": 0.9756, + "step": 1202 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011748094173135337, + "loss": 1.0069, + "step": 1203 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011736481776669306, + "loss": 1.033, + "step": 1204 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011724866965290302, + "loss": 0.9906, + "step": 1205 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011713249755150965, + "loss": 1.1008, + "step": 1206 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011701630162407266, + "loss": 0.9987, + "step": 1207 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011690008203218493, + "loss": 1.0122, + "step": 1208 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001167838389374722, + "loss": 1.0495, + "step": 1209 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001166675725015929, + "loss": 0.9875, + "step": 1210 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011655128288623802, + "loss": 1.0231, + "step": 1211 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011643497025313061, + "loss": 0.9342, + "step": 1212 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011631863476402594, + "loss": 1.1006, + "step": 1213 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011620227658071087, + "loss": 0.9264, + "step": 1214 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011608589586500391, + "loss": 1.1099, + "step": 1215 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011596949277875495, + "loss": 1.0326, + "step": 1216 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001158530674838449, + "loss": 0.9235, + "step": 1217 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011573662014218564, + "loss": 1.0227, + "step": 1218 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011562015091571963, + "loss": 1.0028, + "step": 1219 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011550365996641979, + "loss": 1.0744, + "step": 1220 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011538714745628931, + "loss": 0.9521, + "step": 1221 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011527061354736129, + "loss": 1.0171, + "step": 1222 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011515405840169861, + "loss": 1.0481, + "step": 1223 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011503748218139369, + "loss": 1.0034, + "step": 1224 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011492088504856826, + "loss": 1.1384, + "step": 1225 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011480426716537315, + "loss": 1.0268, + "step": 1226 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011468762869398802, + "loss": 1.003, + "step": 1227 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011457096979662114, + "loss": 1.1087, + "step": 1228 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011445429063550926, + "loss": 1.0809, + "step": 1229 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011433759137291727, + "loss": 1.0054, + "step": 1230 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011422087217113795, + "loss": 0.9416, + "step": 1231 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011410413319249194, + "loss": 1.0153, + "step": 1232 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011398737459932728, + "loss": 1.0622, + "step": 1233 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011387059655401932, + "loss": 1.0792, + "step": 1234 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011375379921897051, + "loss": 0.9822, + "step": 1235 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011363698275661001, + "loss": 0.9949, + "step": 1236 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011352014732939369, + "loss": 0.9653, + "step": 1237 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011340329309980377, + "loss": 1.0694, + "step": 1238 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011328642023034857, + "loss": 0.9925, + "step": 1239 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011316952888356237, + "loss": 0.9829, + "step": 1240 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011305261922200519, + "loss": 0.9659, + "step": 1241 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011293569140826239, + "loss": 1.109, + "step": 1242 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011281874560494472, + "loss": 1.0614, + "step": 1243 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011270178197468789, + "loss": 0.9013, + "step": 1244 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011258480068015235, + "loss": 1.0049, + "step": 1245 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011246780188402322, + "loss": 0.9746, + "step": 1246 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011235078574900984, + "loss": 1.1433, + "step": 1247 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011223375243784573, + "loss": 1.0196, + "step": 1248 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011211670211328833, + "loss": 0.9859, + "step": 1249 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001119996349381187, + "loss": 0.9037, + "step": 1250 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001118825510751413, + "loss": 1.0481, + "step": 1251 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011176545068718385, + "loss": 1.0324, + "step": 1252 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011164833393709706, + "loss": 1.0155, + "step": 1253 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011153120098775434, + "loss": 0.967, + "step": 1254 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011141405200205166, + "loss": 0.9766, + "step": 1255 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011129688714290729, + "loss": 1.0075, + "step": 1256 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011117970657326158, + "loss": 0.9472, + "step": 1257 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011106251045607674, + "loss": 0.9949, + "step": 1258 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011094529895433652, + "loss": 1.0302, + "step": 1259 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001108280722310462, + "loss": 1.0538, + "step": 1260 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011071083044923214, + "loss": 1.0025, + "step": 1261 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011059357377194161, + "loss": 1.024, + "step": 1262 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011047630236224271, + "loss": 0.9452, + "step": 1263 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011035901638322392, + "loss": 1.0055, + "step": 1264 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011024171599799409, + "loss": 0.9875, + "step": 1265 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011012440136968196, + "loss": 0.9582, + "step": 1266 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011000707266143617, + "loss": 0.9986, + "step": 1267 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010988973003642499, + "loss": 1.0328, + "step": 1268 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001097723736578359, + "loss": 1.0108, + "step": 1269 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010965500368887567, + "loss": 0.9941, + "step": 1270 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010953762029276982, + "loss": 1.0842, + "step": 1271 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010942022363276264, + "loss": 0.994, + "step": 1272 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010930281387211683, + "loss": 1.0151, + "step": 1273 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010918539117411333, + "loss": 1.0172, + "step": 1274 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010906795570205104, + "loss": 1.0698, + "step": 1275 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010895050761924668, + "loss": 0.9835, + "step": 1276 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001088330470890344, + "loss": 0.9461, + "step": 1277 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010871557427476583, + "loss": 1.0394, + "step": 1278 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010859808933980948, + "loss": 0.9639, + "step": 1279 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010848059244755093, + "loss": 0.9863, + "step": 1280 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010836308376139221, + "loss": 1.0728, + "step": 1281 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010824556344475181, + "loss": 0.9989, + "step": 1282 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010812803166106444, + "loss": 0.962, + "step": 1283 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010801048857378071, + "loss": 0.8658, + "step": 1284 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010789293434636698, + "loss": 1.0488, + "step": 1285 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010777536914230508, + "loss": 1.0183, + "step": 1286 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010765779312509208, + "loss": 0.9535, + "step": 1287 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010754020645824017, + "loss": 0.9978, + "step": 1288 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010742260930527625, + "loss": 0.8929, + "step": 1289 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001073050018297419, + "loss": 0.9762, + "step": 1290 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010718738419519297, + "loss": 1.0559, + "step": 1291 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010706975656519946, + "loss": 1.0327, + "step": 1292 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010695211910334537, + "loss": 1.0322, + "step": 1293 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010683447197322817, + "loss": 1.0542, + "step": 1294 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010671681533845899, + "loss": 1.0521, + "step": 1295 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010659914936266206, + "loss": 0.9967, + "step": 1296 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010648147420947461, + "loss": 1.0491, + "step": 1297 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010636379004254664, + "loss": 0.9035, + "step": 1298 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010624609702554069, + "loss": 1.0704, + "step": 1299 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010612839532213164, + "loss": 0.9533, + "step": 1300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010601068509600642, + "loss": 1.0396, + "step": 1301 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010589296651086376, + "loss": 0.9543, + "step": 1302 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001057752397304141, + "loss": 1.0591, + "step": 1303 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010565750491837925, + "loss": 1.1191, + "step": 1304 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010553976223849218, + "loss": 0.916, + "step": 1305 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010542201185449678, + "loss": 0.9732, + "step": 1306 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010530425393014774, + "loss": 1.01, + "step": 1307 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010518648862921012, + "loss": 0.9849, + "step": 1308 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001050687161154593, + "loss": 1.0519, + "step": 1309 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010495093655268071, + "loss": 1.0539, + "step": 1310 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010483315010466952, + "loss": 0.9922, + "step": 1311 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010471535693523057, + "loss": 1.0048, + "step": 1312 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010459755720817797, + "loss": 1.0576, + "step": 1313 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010447975108733492, + "loss": 1.0268, + "step": 1314 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010436193873653361, + "loss": 1.0566, + "step": 1315 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010424412031961484, + "loss": 1.0294, + "step": 1316 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010412629600042785, + "loss": 1.0808, + "step": 1317 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010400846594283012, + "loss": 1.0487, + "step": 1318 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010389063031068698, + "loss": 1.04, + "step": 1319 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010377278926787173, + "loss": 1.033, + "step": 1320 + }, + { + "epoch": 1.95, + "learning_rate": 0.000103654942978265, + "loss": 0.9637, + "step": 1321 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010353709160575489, + "loss": 0.9665, + "step": 1322 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010341923531423634, + "loss": 1.0079, + "step": 1323 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010330137426761135, + "loss": 0.9989, + "step": 1324 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010318350862978848, + "loss": 1.0103, + "step": 1325 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010306563856468253, + "loss": 0.9872, + "step": 1326 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010294776423621464, + "loss": 0.9684, + "step": 1327 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010282988580831183, + "loss": 0.9745, + "step": 1328 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010271200344490674, + "loss": 1.0621, + "step": 1329 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001025941173099376, + "loss": 1.0639, + "step": 1330 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010247622756734774, + "loss": 0.914, + "step": 1331 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010235833438108571, + "loss": 1.0135, + "step": 1332 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010224043791510465, + "loss": 1.0132, + "step": 1333 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010212253833336237, + "loss": 0.9912, + "step": 1334 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010200463579982098, + "loss": 0.9869, + "step": 1335 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001018867304784467, + "loss": 0.9784, + "step": 1336 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010176882253320967, + "loss": 0.9837, + "step": 1337 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001016509121280836, + "loss": 1.039, + "step": 1338 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010153299942704566, + "loss": 0.9984, + "step": 1339 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010141508459407623, + "loss": 1.0526, + "step": 1340 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010129716779315862, + "loss": 1.0581, + "step": 1341 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001011792491882789, + "loss": 1.0607, + "step": 1342 + } + ], + "logging_steps": 1, + "max_steps": 2684, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 671, + "total_flos": 4.1689736495602074e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}