diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6025 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0761421319796955, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 0.0005, + "loss": 0.799, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 0.001, + "loss": 0.6675, + "step": 2 + }, + { + "epoch": 0.02, + "learning_rate": 0.000999997522701653, + "loss": 0.999, + "step": 3 + }, + { + "epoch": 0.02, + "learning_rate": 0.0009999900908311602, + "loss": 0.7497, + "step": 4 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999777044621652, + "loss": 0.5151, + "step": 5 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999603637174071, + "loss": 0.5689, + "step": 6 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999380687687187, + "loss": 1.0888, + "step": 7 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999108198370248, + "loss": 0.7286, + "step": 8 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009998786171923407, + "loss": 0.7832, + "step": 9 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999841461153768, + "loss": 0.5225, + "step": 10 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009997993520894936, + "loss": 0.6154, + "step": 11 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009997522904167844, + "loss": 0.822, + "step": 12 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009997002766019831, + "loss": 0.5985, + "step": 13 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009996433111605053, + "loss": 0.6488, + "step": 14 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999581394656832, + "loss": 0.5519, + "step": 15 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009995145277045061, + "loss": 0.8489, + "step": 16 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994427109661253, + "loss": 0.622, + "step": 17 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993659451533353, + "loss": 0.7171, + "step": 18 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009992842310268233, + "loss": 0.6803, + "step": 19 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991975693963108, + "loss": 0.7087, + "step": 20 + }, + { + "epoch": 0.11, + "learning_rate": 0.000999105961120544, + "loss": 0.7689, + "step": 21 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009990094071072877, + "loss": 0.5209, + "step": 22 + }, + { + "epoch": 0.12, + "learning_rate": 0.000998907908313314, + "loss": 0.6299, + "step": 23 + }, + { + "epoch": 0.12, + "learning_rate": 0.000998801465744394, + "loss": 0.9556, + "step": 24 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009986900804552878, + "loss": 0.5427, + "step": 25 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009985737535497337, + "loss": 0.7997, + "step": 26 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009984524861804376, + "loss": 0.7101, + "step": 27 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009983262795490613, + "loss": 0.5568, + "step": 28 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009981951349062107, + "loss": 0.7616, + "step": 29 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009980590535514234, + "loss": 0.5599, + "step": 30 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009979180368331559, + "loss": 0.5306, + "step": 31 + }, + { + "epoch": 0.16, + "learning_rate": 0.00099777208614877, + "loss": 0.7418, + "step": 32 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009976212029445194, + "loss": 0.7747, + "step": 33 + }, + { + "epoch": 0.17, + "learning_rate": 0.000997465388715535, + "loss": 0.7886, + "step": 34 + }, + { + "epoch": 0.18, + "learning_rate": 0.00099730464500581, + "loss": 0.7207, + "step": 35 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009971389734081848, + "loss": 0.7, + "step": 36 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009969683755643318, + "loss": 0.5443, + "step": 37 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009967928531647372, + "loss": 0.6659, + "step": 38 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009966124079486872, + "loss": 0.6102, + "step": 39 + }, + { + "epoch": 0.2, + "learning_rate": 0.000996427041704248, + "loss": 0.6723, + "step": 40 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009962367562682496, + "loss": 0.5818, + "step": 41 + }, + { + "epoch": 0.21, + "learning_rate": 0.000996041553526267, + "loss": 0.6673, + "step": 42 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009958414354126022, + "loss": 0.7536, + "step": 43 + }, + { + "epoch": 0.22, + "learning_rate": 0.000995636403910264, + "loss": 0.4539, + "step": 44 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009954264610509496, + "loss": 0.7296, + "step": 45 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009952116089150232, + "loss": 0.5989, + "step": 46 + }, + { + "epoch": 0.24, + "learning_rate": 0.000994991849631496, + "loss": 0.5712, + "step": 47 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009947671853780054, + "loss": 0.872, + "step": 48 + }, + { + "epoch": 0.25, + "learning_rate": 0.000994537618380793, + "loss": 0.7327, + "step": 49 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009943031509146824, + "loss": 0.7512, + "step": 50 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009940637853030573, + "loss": 0.6572, + "step": 51 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009938195239178375, + "loss": 0.6821, + "step": 52 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009935703691794564, + "loss": 0.6785, + "step": 53 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009933163235568369, + "loss": 0.9043, + "step": 54 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009930573895673657, + "loss": 0.6856, + "step": 55 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009927935697768698, + "loss": 0.4684, + "step": 56 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009925248667995907, + "loss": 0.5579, + "step": 57 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009922512832981584, + "loss": 0.4323, + "step": 58 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009919728219835644, + "loss": 1.0761, + "step": 59 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009916894856151356, + "loss": 0.5668, + "step": 60 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009914012770005072, + "loss": 0.6059, + "step": 61 + }, + { + "epoch": 0.31, + "learning_rate": 0.000991108198995594, + "loss": 0.5635, + "step": 62 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009908102545045623, + "loss": 0.7705, + "step": 63 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009905074464798022, + "loss": 0.8812, + "step": 64 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009901997779218968, + "loss": 0.6145, + "step": 65 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009898872518795932, + "loss": 0.5681, + "step": 66 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009895698714497724, + "loss": 0.658, + "step": 67 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009892476397774185, + "loss": 0.5969, + "step": 68 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009889205600555875, + "loss": 0.5991, + "step": 69 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009885886355253757, + "loss": 0.762, + "step": 70 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009882518694758874, + "loss": 0.5969, + "step": 71 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009879102652442023, + "loss": 0.5668, + "step": 72 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009875638262153432, + "loss": 0.6104, + "step": 73 + }, + { + "epoch": 0.38, + "learning_rate": 0.000987212555822241, + "loss": 0.5512, + "step": 74 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009868564575457022, + "loss": 0.5551, + "step": 75 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009864955349143734, + "loss": 0.6269, + "step": 76 + }, + { + "epoch": 0.39, + "learning_rate": 0.000986129791504707, + "loss": 0.5629, + "step": 77 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009857592309409247, + "loss": 0.7589, + "step": 78 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009853838568949832, + "loss": 0.5747, + "step": 79 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009850036730865363, + "loss": 0.7552, + "step": 80 + }, + { + "epoch": 0.41, + "learning_rate": 0.000984618683282899, + "loss": 0.6728, + "step": 81 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009842288912990096, + "loss": 0.6057, + "step": 82 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009838343009973924, + "loss": 0.545, + "step": 83 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009834349162881188, + "loss": 0.7007, + "step": 84 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009830307411287697, + "loss": 0.5392, + "step": 85 + }, + { + "epoch": 0.44, + "learning_rate": 0.000982621779524394, + "loss": 0.8032, + "step": 86 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009822080355274718, + "loss": 0.5957, + "step": 87 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009817895132378724, + "loss": 0.5639, + "step": 88 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009813662168028144, + "loss": 0.6488, + "step": 89 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009809381504168233, + "loss": 0.8626, + "step": 90 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009805053183216924, + "loss": 0.6872, + "step": 91 + }, + { + "epoch": 0.47, + "learning_rate": 0.0009800677248064382, + "loss": 0.6908, + "step": 92 + }, + { + "epoch": 0.47, + "learning_rate": 0.0009796253742072596, + "loss": 0.699, + "step": 93 + }, + { + "epoch": 0.48, + "learning_rate": 0.0009791782709074944, + "loss": 0.6305, + "step": 94 + }, + { + "epoch": 0.48, + "learning_rate": 0.0009787264193375754, + "loss": 0.8407, + "step": 95 + }, + { + "epoch": 0.49, + "learning_rate": 0.0009782698239749871, + "loss": 0.7651, + "step": 96 + }, + { + "epoch": 0.49, + "learning_rate": 0.0009778084893442218, + "loss": 0.5649, + "step": 97 + }, + { + "epoch": 0.5, + "learning_rate": 0.000977342420016733, + "loss": 0.6298, + "step": 98 + }, + { + "epoch": 0.5, + "learning_rate": 0.0009768716206108921, + "loss": 0.6444, + "step": 99 + }, + { + "epoch": 0.51, + "learning_rate": 0.0009763960957919414, + "loss": 0.5879, + "step": 100 + }, + { + "epoch": 0.51, + "learning_rate": 0.0009759158502719481, + "loss": 0.6006, + "step": 101 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009754308888097583, + "loss": 0.6416, + "step": 102 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009749412162109486, + "loss": 0.8301, + "step": 103 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009744468373277796, + "loss": 0.8659, + "step": 104 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009739477570591473, + "loss": 0.7777, + "step": 105 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009734439803505345, + "loss": 0.5722, + "step": 106 + }, + { + "epoch": 0.54, + "learning_rate": 0.000972935512193962, + "loss": 0.6249, + "step": 107 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009724223576279394, + "loss": 1.1485, + "step": 108 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009719045217374141, + "loss": 0.6837, + "step": 109 + }, + { + "epoch": 0.56, + "learning_rate": 0.0009713820096537225, + "loss": 0.5921, + "step": 110 + }, + { + "epoch": 0.56, + "learning_rate": 0.0009708548265545375, + "loss": 0.8619, + "step": 111 + }, + { + "epoch": 0.57, + "learning_rate": 0.0009703229776638185, + "loss": 0.6175, + "step": 112 + }, + { + "epoch": 0.57, + "learning_rate": 0.0009697864682517592, + "loss": 0.6915, + "step": 113 + }, + { + "epoch": 0.58, + "learning_rate": 0.0009692453036347351, + "loss": 0.5714, + "step": 114 + }, + { + "epoch": 0.58, + "learning_rate": 0.0009686994891752507, + "loss": 0.5019, + "step": 115 + }, + { + "epoch": 0.59, + "learning_rate": 0.0009681490302818874, + "loss": 1.1504, + "step": 116 + }, + { + "epoch": 0.59, + "learning_rate": 0.0009675939324092487, + "loss": 0.6648, + "step": 117 + }, + { + "epoch": 0.6, + "learning_rate": 0.0009670342010579065, + "loss": 0.6379, + "step": 118 + }, + { + "epoch": 0.6, + "learning_rate": 0.0009664698417743475, + "loss": 0.6071, + "step": 119 + }, + { + "epoch": 0.61, + "learning_rate": 0.0009659008601509168, + "loss": 0.675, + "step": 120 + }, + { + "epoch": 0.61, + "learning_rate": 0.0009653272618257631, + "loss": 0.5167, + "step": 121 + }, + { + "epoch": 0.62, + "learning_rate": 0.0009647490524827833, + "loss": 0.7457, + "step": 122 + }, + { + "epoch": 0.62, + "learning_rate": 0.0009641662378515658, + "loss": 0.836, + "step": 123 + }, + { + "epoch": 0.63, + "learning_rate": 0.0009635788237073333, + "loss": 0.759, + "step": 124 + }, + { + "epoch": 0.63, + "learning_rate": 0.0009629868158708861, + "loss": 0.885, + "step": 125 + }, + { + "epoch": 0.64, + "learning_rate": 0.0009623902202085444, + "loss": 0.7311, + "step": 126 + }, + { + "epoch": 0.64, + "learning_rate": 0.0009617890426320899, + "loss": 0.7275, + "step": 127 + }, + { + "epoch": 0.65, + "learning_rate": 0.0009611832890987075, + "loss": 0.6473, + "step": 128 + }, + { + "epoch": 0.65, + "learning_rate": 0.0009605729656109264, + "loss": 0.6269, + "step": 129 + }, + { + "epoch": 0.66, + "learning_rate": 0.0009599580782165598, + "loss": 0.9734, + "step": 130 + }, + { + "epoch": 0.66, + "learning_rate": 0.0009593386330086457, + "loss": 0.6377, + "step": 131 + }, + { + "epoch": 0.67, + "learning_rate": 0.0009587146361253867, + "loss": 0.7208, + "step": 132 + }, + { + "epoch": 0.68, + "learning_rate": 0.0009580860937500883, + "loss": 0.5472, + "step": 133 + }, + { + "epoch": 0.68, + "learning_rate": 0.0009574530121110989, + "loss": 0.8343, + "step": 134 + }, + { + "epoch": 0.69, + "learning_rate": 0.0009568153974817464, + "loss": 0.6048, + "step": 135 + }, + { + "epoch": 0.69, + "learning_rate": 0.0009561732561802779, + "loss": 0.5346, + "step": 136 + }, + { + "epoch": 0.7, + "learning_rate": 0.0009555265945697953, + "loss": 0.6295, + "step": 137 + }, + { + "epoch": 0.7, + "learning_rate": 0.0009548754190581938, + "loss": 0.6079, + "step": 138 + }, + { + "epoch": 0.71, + "learning_rate": 0.0009542197360980978, + "loss": 0.5289, + "step": 139 + }, + { + "epoch": 0.71, + "learning_rate": 0.0009535595521867959, + "loss": 0.7579, + "step": 140 + }, + { + "epoch": 0.72, + "learning_rate": 0.0009528948738661784, + "loss": 0.8158, + "step": 141 + }, + { + "epoch": 0.72, + "learning_rate": 0.0009522257077226717, + "loss": 0.6023, + "step": 142 + }, + { + "epoch": 0.73, + "learning_rate": 0.0009515520603871719, + "loss": 0.6647, + "step": 143 + }, + { + "epoch": 0.73, + "learning_rate": 0.0009508739385349812, + "loss": 1.1108, + "step": 144 + }, + { + "epoch": 0.74, + "learning_rate": 0.0009501913488857399, + "loss": 0.9834, + "step": 145 + }, + { + "epoch": 0.74, + "learning_rate": 0.0009495042982033611, + "loss": 0.9049, + "step": 146 + }, + { + "epoch": 0.75, + "learning_rate": 0.0009488127932959625, + "loss": 0.6604, + "step": 147 + }, + { + "epoch": 0.75, + "learning_rate": 0.0009481168410158003, + "loss": 0.8768, + "step": 148 + }, + { + "epoch": 0.76, + "learning_rate": 0.0009474164482592001, + "loss": 0.6579, + "step": 149 + }, + { + "epoch": 0.76, + "learning_rate": 0.0009467116219664893, + "loss": 0.7038, + "step": 150 + }, + { + "epoch": 0.77, + "learning_rate": 0.0009460023691219276, + "loss": 0.6726, + "step": 151 + }, + { + "epoch": 0.77, + "learning_rate": 0.0009452886967536389, + "loss": 1.1224, + "step": 152 + }, + { + "epoch": 0.78, + "learning_rate": 0.0009445706119335407, + "loss": 0.7254, + "step": 153 + }, + { + "epoch": 0.78, + "learning_rate": 0.0009438481217772743, + "loss": 0.6464, + "step": 154 + }, + { + "epoch": 0.79, + "learning_rate": 0.0009431212334441342, + "loss": 1.0653, + "step": 155 + }, + { + "epoch": 0.79, + "learning_rate": 0.0009423899541369978, + "loss": 0.7072, + "step": 156 + }, + { + "epoch": 0.8, + "learning_rate": 0.000941654291102253, + "loss": 0.9562, + "step": 157 + }, + { + "epoch": 0.8, + "learning_rate": 0.0009409142516297269, + "loss": 0.5401, + "step": 158 + }, + { + "epoch": 0.81, + "learning_rate": 0.0009401698430526141, + "loss": 0.628, + "step": 159 + }, + { + "epoch": 0.81, + "learning_rate": 0.0009394210727474029, + "loss": 0.6226, + "step": 160 + }, + { + "epoch": 0.82, + "learning_rate": 0.0009386679481338031, + "loss": 0.8939, + "step": 161 + }, + { + "epoch": 0.82, + "learning_rate": 0.0009379104766746722, + "loss": 0.614, + "step": 162 + }, + { + "epoch": 0.83, + "learning_rate": 0.0009371486658759415, + "loss": 0.6249, + "step": 163 + }, + { + "epoch": 0.83, + "learning_rate": 0.0009363825232865413, + "loss": 0.5156, + "step": 164 + }, + { + "epoch": 0.84, + "learning_rate": 0.0009356120564983266, + "loss": 0.6214, + "step": 165 + }, + { + "epoch": 0.84, + "learning_rate": 0.0009348372731460022, + "loss": 0.6466, + "step": 166 + }, + { + "epoch": 0.85, + "learning_rate": 0.0009340581809070458, + "loss": 1.3654, + "step": 167 + }, + { + "epoch": 0.85, + "learning_rate": 0.0009332747875016332, + "loss": 0.7355, + "step": 168 + }, + { + "epoch": 0.86, + "learning_rate": 0.0009324871006925612, + "loss": 0.6488, + "step": 169 + }, + { + "epoch": 0.86, + "learning_rate": 0.0009316951282851706, + "loss": 0.6619, + "step": 170 + }, + { + "epoch": 0.87, + "learning_rate": 0.0009308988781272693, + "loss": 0.7481, + "step": 171 + }, + { + "epoch": 0.87, + "learning_rate": 0.0009300983581090541, + "loss": 0.7286, + "step": 172 + }, + { + "epoch": 0.88, + "learning_rate": 0.0009292935761630325, + "loss": 0.6915, + "step": 173 + }, + { + "epoch": 0.88, + "learning_rate": 0.0009284845402639446, + "loss": 0.7665, + "step": 174 + }, + { + "epoch": 0.89, + "learning_rate": 0.0009276712584286833, + "loss": 0.5691, + "step": 175 + }, + { + "epoch": 0.89, + "learning_rate": 0.000926853738716216, + "loss": 0.9606, + "step": 176 + }, + { + "epoch": 0.9, + "learning_rate": 0.0009260319892275033, + "loss": 0.6955, + "step": 177 + }, + { + "epoch": 0.9, + "learning_rate": 0.00092520601810542, + "loss": 0.7575, + "step": 178 + }, + { + "epoch": 0.91, + "learning_rate": 0.0009243758335346734, + "loss": 0.644, + "step": 179 + }, + { + "epoch": 0.91, + "learning_rate": 0.0009235414437417234, + "loss": 0.6696, + "step": 180 + }, + { + "epoch": 0.92, + "learning_rate": 0.0009227028569946996, + "loss": 0.7746, + "step": 181 + }, + { + "epoch": 0.92, + "learning_rate": 0.0009218600816033201, + "loss": 0.7244, + "step": 182 + }, + { + "epoch": 0.93, + "learning_rate": 0.0009210131259188095, + "loss": 0.6162, + "step": 183 + }, + { + "epoch": 0.93, + "learning_rate": 0.0009201619983338152, + "loss": 1.1655, + "step": 184 + }, + { + "epoch": 0.94, + "learning_rate": 0.0009193067072823251, + "loss": 0.7024, + "step": 185 + }, + { + "epoch": 0.94, + "learning_rate": 0.000918447261239584, + "loss": 0.8246, + "step": 186 + }, + { + "epoch": 0.95, + "learning_rate": 0.0009175836687220084, + "loss": 0.8583, + "step": 187 + }, + { + "epoch": 0.95, + "learning_rate": 0.0009167159382871039, + "loss": 0.6455, + "step": 188 + }, + { + "epoch": 0.96, + "learning_rate": 0.000915844078533379, + "loss": 0.7452, + "step": 189 + }, + { + "epoch": 0.96, + "learning_rate": 0.0009149680981002608, + "loss": 0.6813, + "step": 190 + }, + { + "epoch": 0.97, + "learning_rate": 0.0009140880056680088, + "loss": 0.652, + "step": 191 + }, + { + "epoch": 0.97, + "learning_rate": 0.000913203809957629, + "loss": 0.6686, + "step": 192 + }, + { + "epoch": 0.98, + "learning_rate": 0.0009123155197307875, + "loss": 0.7548, + "step": 193 + }, + { + "epoch": 0.98, + "learning_rate": 0.0009114231437897244, + "loss": 0.8523, + "step": 194 + }, + { + "epoch": 0.99, + "learning_rate": 0.0009105266909771652, + "loss": 0.5544, + "step": 195 + }, + { + "epoch": 0.99, + "learning_rate": 0.0009096261701762343, + "loss": 0.6712, + "step": 196 + }, + { + "epoch": 1.0, + "learning_rate": 0.0009087215903103663, + "loss": 0.4564, + "step": 197 + }, + { + "epoch": 1.01, + "learning_rate": 0.0009078129603432181, + "loss": 0.3499, + "step": 198 + }, + { + "epoch": 1.01, + "learning_rate": 0.0009069002892785796, + "loss": 0.6096, + "step": 199 + }, + { + "epoch": 1.02, + "learning_rate": 0.0009059835861602853, + "loss": 0.1973, + "step": 200 + }, + { + "epoch": 1.02, + "learning_rate": 0.0009050628600721233, + "loss": 0.4716, + "step": 201 + }, + { + "epoch": 1.03, + "learning_rate": 0.0009041381201377467, + "loss": 0.443, + "step": 202 + }, + { + "epoch": 1.03, + "learning_rate": 0.0009032093755205823, + "loss": 0.4729, + "step": 203 + }, + { + "epoch": 1.04, + "learning_rate": 0.0009022766354237399, + "loss": 0.373, + "step": 204 + }, + { + "epoch": 1.04, + "learning_rate": 0.0009013399090899217, + "loss": 0.4033, + "step": 205 + }, + { + "epoch": 1.05, + "learning_rate": 0.0009003992058013302, + "loss": 0.3282, + "step": 206 + }, + { + "epoch": 1.05, + "learning_rate": 0.0008994545348795758, + "loss": 0.4702, + "step": 207 + }, + { + "epoch": 1.06, + "learning_rate": 0.0008985059056855857, + "loss": 0.5088, + "step": 208 + }, + { + "epoch": 1.06, + "learning_rate": 0.0008975533276195101, + "loss": 0.4294, + "step": 209 + }, + { + "epoch": 1.07, + "learning_rate": 0.0008965968101206292, + "loss": 0.2792, + "step": 210 + }, + { + "epoch": 1.07, + "learning_rate": 0.0008956363626672594, + "loss": 0.3841, + "step": 211 + }, + { + "epoch": 1.08, + "learning_rate": 0.0008946719947766611, + "loss": 0.4744, + "step": 212 + }, + { + "epoch": 1.08, + "learning_rate": 0.0008937037160049415, + "loss": 0.3686, + "step": 213 + }, + { + "epoch": 1.09, + "learning_rate": 0.0008927315359469625, + "loss": 0.6375, + "step": 214 + }, + { + "epoch": 1.09, + "learning_rate": 0.0008917554642362443, + "loss": 0.8962, + "step": 215 + }, + { + "epoch": 1.1, + "learning_rate": 0.0008907755105448703, + "loss": 0.4171, + "step": 216 + }, + { + "epoch": 1.1, + "learning_rate": 0.000889791684583391, + "loss": 0.4826, + "step": 217 + }, + { + "epoch": 1.11, + "learning_rate": 0.0008888039961007281, + "loss": 0.5129, + "step": 218 + }, + { + "epoch": 1.11, + "learning_rate": 0.000887812454884078, + "loss": 0.2824, + "step": 219 + }, + { + "epoch": 1.12, + "learning_rate": 0.0008868170707588142, + "loss": 0.5414, + "step": 220 + }, + { + "epoch": 1.12, + "learning_rate": 0.0008858178535883905, + "loss": 0.4396, + "step": 221 + }, + { + "epoch": 1.13, + "learning_rate": 0.000884814813274243, + "loss": 0.4208, + "step": 222 + }, + { + "epoch": 1.13, + "learning_rate": 0.0008838079597556925, + "loss": 0.3622, + "step": 223 + }, + { + "epoch": 1.14, + "learning_rate": 0.0008827973030098447, + "loss": 0.5095, + "step": 224 + }, + { + "epoch": 1.14, + "learning_rate": 0.0008817828530514931, + "loss": 0.3444, + "step": 225 + }, + { + "epoch": 1.15, + "learning_rate": 0.0008807646199330186, + "loss": 0.3598, + "step": 226 + }, + { + "epoch": 1.15, + "learning_rate": 0.0008797426137442896, + "loss": 0.2523, + "step": 227 + }, + { + "epoch": 1.16, + "learning_rate": 0.0008787168446125638, + "loss": 0.3462, + "step": 228 + }, + { + "epoch": 1.16, + "learning_rate": 0.0008776873227023851, + "loss": 0.4004, + "step": 229 + }, + { + "epoch": 1.17, + "learning_rate": 0.0008766540582154859, + "loss": 0.4201, + "step": 230 + }, + { + "epoch": 1.17, + "learning_rate": 0.0008756170613906833, + "loss": 0.4267, + "step": 231 + }, + { + "epoch": 1.18, + "learning_rate": 0.0008745763425037796, + "loss": 0.4423, + "step": 232 + }, + { + "epoch": 1.18, + "learning_rate": 0.0008735319118674597, + "loss": 0.5584, + "step": 233 + }, + { + "epoch": 1.19, + "learning_rate": 0.0008724837798311882, + "loss": 0.2896, + "step": 234 + }, + { + "epoch": 1.19, + "learning_rate": 0.0008714319567811089, + "loss": 0.4205, + "step": 235 + }, + { + "epoch": 1.2, + "learning_rate": 0.0008703764531399392, + "loss": 0.38, + "step": 236 + }, + { + "epoch": 1.2, + "learning_rate": 0.000869317279366869, + "loss": 0.4093, + "step": 237 + }, + { + "epoch": 1.21, + "learning_rate": 0.0008682544459574561, + "loss": 0.4539, + "step": 238 + }, + { + "epoch": 1.21, + "learning_rate": 0.0008671879634435224, + "loss": 0.4635, + "step": 239 + }, + { + "epoch": 1.22, + "learning_rate": 0.0008661178423930491, + "loss": 0.3336, + "step": 240 + }, + { + "epoch": 1.22, + "learning_rate": 0.0008650440934100728, + "loss": 0.3342, + "step": 241 + }, + { + "epoch": 1.23, + "learning_rate": 0.0008639667271345798, + "loss": 0.4935, + "step": 242 + }, + { + "epoch": 1.23, + "learning_rate": 0.0008628857542424009, + "loss": 0.499, + "step": 243 + }, + { + "epoch": 1.24, + "learning_rate": 0.0008618011854451056, + "loss": 0.6852, + "step": 244 + }, + { + "epoch": 1.24, + "learning_rate": 0.0008607130314898956, + "loss": 0.5297, + "step": 245 + }, + { + "epoch": 1.25, + "learning_rate": 0.000859621303159499, + "loss": 0.3527, + "step": 246 + }, + { + "epoch": 1.25, + "learning_rate": 0.0008585260112720631, + "loss": 0.4286, + "step": 247 + }, + { + "epoch": 1.26, + "learning_rate": 0.0008574271666810469, + "loss": 0.4928, + "step": 248 + }, + { + "epoch": 1.26, + "learning_rate": 0.0008563247802751139, + "loss": 0.5073, + "step": 249 + }, + { + "epoch": 1.27, + "learning_rate": 0.0008552188629780245, + "loss": 0.4953, + "step": 250 + }, + { + "epoch": 1.27, + "learning_rate": 0.0008541094257485265, + "loss": 0.4353, + "step": 251 + }, + { + "epoch": 1.28, + "learning_rate": 0.0008529964795802485, + "loss": 0.4549, + "step": 252 + }, + { + "epoch": 1.28, + "learning_rate": 0.0008518800355015891, + "loss": 0.5555, + "step": 253 + }, + { + "epoch": 1.29, + "learning_rate": 0.0008507601045756085, + "loss": 0.3896, + "step": 254 + }, + { + "epoch": 1.29, + "learning_rate": 0.0008496366978999189, + "loss": 0.3865, + "step": 255 + }, + { + "epoch": 1.3, + "learning_rate": 0.0008485098266065744, + "loss": 0.5022, + "step": 256 + }, + { + "epoch": 1.3, + "learning_rate": 0.0008473795018619604, + "loss": 0.4086, + "step": 257 + }, + { + "epoch": 1.31, + "learning_rate": 0.0008462457348666835, + "loss": 0.4797, + "step": 258 + }, + { + "epoch": 1.31, + "learning_rate": 0.0008451085368554601, + "loss": 0.4812, + "step": 259 + }, + { + "epoch": 1.32, + "learning_rate": 0.0008439679190970051, + "loss": 0.6112, + "step": 260 + }, + { + "epoch": 1.32, + "learning_rate": 0.0008428238928939207, + "loss": 0.3997, + "step": 261 + }, + { + "epoch": 1.33, + "learning_rate": 0.0008416764695825835, + "loss": 0.5654, + "step": 262 + }, + { + "epoch": 1.34, + "learning_rate": 0.0008405256605330332, + "loss": 0.3926, + "step": 263 + }, + { + "epoch": 1.34, + "learning_rate": 0.0008393714771488589, + "loss": 0.4882, + "step": 264 + }, + { + "epoch": 1.35, + "learning_rate": 0.0008382139308670875, + "loss": 0.5966, + "step": 265 + }, + { + "epoch": 1.35, + "learning_rate": 0.0008370530331580686, + "loss": 0.7363, + "step": 266 + }, + { + "epoch": 1.36, + "learning_rate": 0.000835888795525362, + "loss": 0.4962, + "step": 267 + }, + { + "epoch": 1.36, + "learning_rate": 0.0008347212295056239, + "loss": 0.4113, + "step": 268 + }, + { + "epoch": 1.37, + "learning_rate": 0.0008335503466684915, + "loss": 0.6778, + "step": 269 + }, + { + "epoch": 1.37, + "learning_rate": 0.0008323761586164695, + "loss": 0.4551, + "step": 270 + }, + { + "epoch": 1.38, + "learning_rate": 0.0008311986769848141, + "loss": 0.3871, + "step": 271 + }, + { + "epoch": 1.38, + "learning_rate": 0.0008300179134414187, + "loss": 0.521, + "step": 272 + }, + { + "epoch": 1.39, + "learning_rate": 0.0008288338796866977, + "loss": 0.6278, + "step": 273 + }, + { + "epoch": 1.39, + "learning_rate": 0.0008276465874534701, + "loss": 0.3758, + "step": 274 + }, + { + "epoch": 1.4, + "learning_rate": 0.0008264560485068446, + "loss": 0.5227, + "step": 275 + }, + { + "epoch": 1.4, + "learning_rate": 0.0008252622746441021, + "loss": 0.3918, + "step": 276 + }, + { + "epoch": 1.41, + "learning_rate": 0.0008240652776945781, + "loss": 0.377, + "step": 277 + }, + { + "epoch": 1.41, + "learning_rate": 0.0008228650695195472, + "loss": 0.5587, + "step": 278 + }, + { + "epoch": 1.42, + "learning_rate": 0.0008216616620121042, + "loss": 0.4371, + "step": 279 + }, + { + "epoch": 1.42, + "learning_rate": 0.0008204550670970469, + "loss": 0.3701, + "step": 280 + }, + { + "epoch": 1.43, + "learning_rate": 0.0008192452967307575, + "loss": 0.3613, + "step": 281 + }, + { + "epoch": 1.43, + "learning_rate": 0.0008180323629010848, + "loss": 0.5874, + "step": 282 + }, + { + "epoch": 1.44, + "learning_rate": 0.0008168162776272244, + "loss": 0.4882, + "step": 283 + }, + { + "epoch": 1.44, + "learning_rate": 0.0008155970529596006, + "loss": 0.3851, + "step": 284 + }, + { + "epoch": 1.45, + "learning_rate": 0.0008143747009797464, + "loss": 0.49, + "step": 285 + }, + { + "epoch": 1.45, + "learning_rate": 0.000813149233800184, + "loss": 0.584, + "step": 286 + }, + { + "epoch": 1.46, + "learning_rate": 0.0008119206635643044, + "loss": 0.4602, + "step": 287 + }, + { + "epoch": 1.46, + "learning_rate": 0.0008106890024462481, + "loss": 0.5223, + "step": 288 + }, + { + "epoch": 1.47, + "learning_rate": 0.0008094542626507828, + "loss": 0.4446, + "step": 289 + }, + { + "epoch": 1.47, + "learning_rate": 0.0008082164564131844, + "loss": 0.4818, + "step": 290 + }, + { + "epoch": 1.48, + "learning_rate": 0.000806975595999114, + "loss": 0.4525, + "step": 291 + }, + { + "epoch": 1.48, + "learning_rate": 0.0008057316937044977, + "loss": 0.5049, + "step": 292 + }, + { + "epoch": 1.49, + "learning_rate": 0.0008044847618554035, + "loss": 0.4057, + "step": 293 + }, + { + "epoch": 1.49, + "learning_rate": 0.0008032348128079204, + "loss": 0.4718, + "step": 294 + }, + { + "epoch": 1.5, + "learning_rate": 0.0008019818589480352, + "loss": 0.43, + "step": 295 + }, + { + "epoch": 1.5, + "learning_rate": 0.0008007259126915101, + "loss": 0.4611, + "step": 296 + }, + { + "epoch": 1.51, + "learning_rate": 0.0007994669864837594, + "loss": 0.5587, + "step": 297 + }, + { + "epoch": 1.51, + "learning_rate": 0.0007982050927997263, + "loss": 0.7014, + "step": 298 + }, + { + "epoch": 1.52, + "learning_rate": 0.0007969402441437595, + "loss": 0.4874, + "step": 299 + }, + { + "epoch": 1.52, + "learning_rate": 0.0007956724530494887, + "loss": 0.6031, + "step": 300 + }, + { + "epoch": 1.53, + "learning_rate": 0.0007944017320797012, + "loss": 0.4922, + "step": 301 + }, + { + "epoch": 1.53, + "learning_rate": 0.0007931280938262169, + "loss": 0.5301, + "step": 302 + }, + { + "epoch": 1.54, + "learning_rate": 0.0007918515509097633, + "loss": 0.5914, + "step": 303 + }, + { + "epoch": 1.54, + "learning_rate": 0.0007905721159798514, + "loss": 0.5355, + "step": 304 + }, + { + "epoch": 1.55, + "learning_rate": 0.000789289801714649, + "loss": 0.9378, + "step": 305 + }, + { + "epoch": 1.55, + "learning_rate": 0.0007880046208208562, + "loss": 0.5175, + "step": 306 + }, + { + "epoch": 1.56, + "learning_rate": 0.0007867165860335792, + "loss": 0.4921, + "step": 307 + }, + { + "epoch": 1.56, + "learning_rate": 0.0007854257101162037, + "loss": 0.3571, + "step": 308 + }, + { + "epoch": 1.57, + "learning_rate": 0.0007841320058602688, + "loss": 0.4316, + "step": 309 + }, + { + "epoch": 1.57, + "learning_rate": 0.0007828354860853399, + "loss": 0.7104, + "step": 310 + }, + { + "epoch": 1.58, + "learning_rate": 0.0007815361636388827, + "loss": 0.5402, + "step": 311 + }, + { + "epoch": 1.58, + "learning_rate": 0.0007802340513961342, + "loss": 0.451, + "step": 312 + }, + { + "epoch": 1.59, + "learning_rate": 0.0007789291622599765, + "loss": 0.4272, + "step": 313 + }, + { + "epoch": 1.59, + "learning_rate": 0.0007776215091608085, + "loss": 0.6552, + "step": 314 + }, + { + "epoch": 1.6, + "learning_rate": 0.0007763111050564178, + "loss": 0.5025, + "step": 315 + }, + { + "epoch": 1.6, + "learning_rate": 0.0007749979629318516, + "loss": 0.4279, + "step": 316 + }, + { + "epoch": 1.61, + "learning_rate": 0.0007736820957992895, + "loss": 0.5845, + "step": 317 + }, + { + "epoch": 1.61, + "learning_rate": 0.0007723635166979133, + "loss": 0.4912, + "step": 318 + }, + { + "epoch": 1.62, + "learning_rate": 0.0007710422386937784, + "loss": 0.4654, + "step": 319 + }, + { + "epoch": 1.62, + "learning_rate": 0.0007697182748796841, + "loss": 0.3999, + "step": 320 + }, + { + "epoch": 1.63, + "learning_rate": 0.0007683916383750436, + "loss": 0.584, + "step": 321 + }, + { + "epoch": 1.63, + "learning_rate": 0.0007670623423257548, + "loss": 0.4966, + "step": 322 + }, + { + "epoch": 1.64, + "learning_rate": 0.0007657303999040692, + "loss": 0.3419, + "step": 323 + }, + { + "epoch": 1.64, + "learning_rate": 0.0007643958243084619, + "loss": 0.5106, + "step": 324 + }, + { + "epoch": 1.65, + "learning_rate": 0.0007630586287635007, + "loss": 0.5535, + "step": 325 + }, + { + "epoch": 1.65, + "learning_rate": 0.0007617188265197149, + "loss": 0.4968, + "step": 326 + }, + { + "epoch": 1.66, + "learning_rate": 0.0007603764308534636, + "loss": 0.4221, + "step": 327 + }, + { + "epoch": 1.66, + "learning_rate": 0.0007590314550668054, + "loss": 0.5194, + "step": 328 + }, + { + "epoch": 1.67, + "learning_rate": 0.0007576839124873654, + "loss": 0.4809, + "step": 329 + }, + { + "epoch": 1.68, + "learning_rate": 0.0007563338164682036, + "loss": 0.4809, + "step": 330 + }, + { + "epoch": 1.68, + "learning_rate": 0.0007549811803876825, + "loss": 0.4812, + "step": 331 + }, + { + "epoch": 1.69, + "learning_rate": 0.0007536260176493348, + "loss": 0.4273, + "step": 332 + }, + { + "epoch": 1.69, + "learning_rate": 0.00075226834168173, + "loss": 0.4738, + "step": 333 + }, + { + "epoch": 1.7, + "learning_rate": 0.0007509081659383416, + "loss": 0.448, + "step": 334 + }, + { + "epoch": 1.7, + "learning_rate": 0.0007495455038974146, + "loss": 0.4226, + "step": 335 + }, + { + "epoch": 1.71, + "learning_rate": 0.0007481803690618304, + "loss": 0.4618, + "step": 336 + }, + { + "epoch": 1.71, + "learning_rate": 0.000746812774958974, + "loss": 0.4488, + "step": 337 + }, + { + "epoch": 1.72, + "learning_rate": 0.0007454427351405999, + "loss": 0.4695, + "step": 338 + }, + { + "epoch": 1.72, + "learning_rate": 0.0007440702631826976, + "loss": 0.3598, + "step": 339 + }, + { + "epoch": 1.73, + "learning_rate": 0.0007426953726853573, + "loss": 0.4688, + "step": 340 + }, + { + "epoch": 1.73, + "learning_rate": 0.0007413180772726348, + "loss": 0.3983, + "step": 341 + }, + { + "epoch": 1.74, + "learning_rate": 0.0007399383905924165, + "loss": 0.5252, + "step": 342 + }, + { + "epoch": 1.74, + "learning_rate": 0.0007385563263162847, + "loss": 0.5176, + "step": 343 + }, + { + "epoch": 1.75, + "learning_rate": 0.0007371718981393814, + "loss": 0.6202, + "step": 344 + }, + { + "epoch": 1.75, + "learning_rate": 0.0007357851197802735, + "loss": 0.3895, + "step": 345 + }, + { + "epoch": 1.76, + "learning_rate": 0.0007343960049808155, + "loss": 0.4626, + "step": 346 + }, + { + "epoch": 1.76, + "learning_rate": 0.0007330045675060148, + "loss": 0.4189, + "step": 347 + }, + { + "epoch": 1.77, + "learning_rate": 0.0007316108211438946, + "loss": 0.3928, + "step": 348 + }, + { + "epoch": 1.77, + "learning_rate": 0.0007302147797053569, + "loss": 0.6127, + "step": 349 + }, + { + "epoch": 1.78, + "learning_rate": 0.0007288164570240462, + "loss": 0.4124, + "step": 350 + }, + { + "epoch": 1.78, + "learning_rate": 0.0007274158669562126, + "loss": 0.4845, + "step": 351 + }, + { + "epoch": 1.79, + "learning_rate": 0.0007260130233805739, + "loss": 0.5578, + "step": 352 + }, + { + "epoch": 1.79, + "learning_rate": 0.0007246079401981784, + "loss": 0.9514, + "step": 353 + }, + { + "epoch": 1.8, + "learning_rate": 0.0007232006313322668, + "loss": 0.4722, + "step": 354 + }, + { + "epoch": 1.8, + "learning_rate": 0.0007217911107281352, + "loss": 0.462, + "step": 355 + }, + { + "epoch": 1.81, + "learning_rate": 0.0007203793923529956, + "loss": 0.5369, + "step": 356 + }, + { + "epoch": 1.81, + "learning_rate": 0.0007189654901958386, + "loss": 0.5005, + "step": 357 + }, + { + "epoch": 1.82, + "learning_rate": 0.0007175494182672939, + "loss": 0.5964, + "step": 358 + }, + { + "epoch": 1.82, + "learning_rate": 0.0007161311905994922, + "loss": 0.4215, + "step": 359 + }, + { + "epoch": 1.83, + "learning_rate": 0.0007147108212459257, + "loss": 0.5255, + "step": 360 + }, + { + "epoch": 1.83, + "learning_rate": 0.000713288324281309, + "loss": 0.4609, + "step": 361 + }, + { + "epoch": 1.84, + "learning_rate": 0.0007118637138014396, + "loss": 0.5004, + "step": 362 + }, + { + "epoch": 1.84, + "learning_rate": 0.0007104370039230583, + "loss": 0.5112, + "step": 363 + }, + { + "epoch": 1.85, + "learning_rate": 0.0007090082087837092, + "loss": 0.572, + "step": 364 + }, + { + "epoch": 1.85, + "learning_rate": 0.0007075773425415994, + "loss": 0.3962, + "step": 365 + }, + { + "epoch": 1.86, + "learning_rate": 0.0007061444193754596, + "loss": 0.5081, + "step": 366 + }, + { + "epoch": 1.86, + "learning_rate": 0.0007047094534844022, + "loss": 0.4317, + "step": 367 + }, + { + "epoch": 1.87, + "learning_rate": 0.0007032724590877821, + "loss": 0.434, + "step": 368 + }, + { + "epoch": 1.87, + "learning_rate": 0.0007018334504250545, + "loss": 0.424, + "step": 369 + }, + { + "epoch": 1.88, + "learning_rate": 0.0007003924417556343, + "loss": 0.5263, + "step": 370 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006989494473587554, + "loss": 0.3515, + "step": 371 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006975044815333281, + "loss": 0.5854, + "step": 372 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006960575585977984, + "loss": 0.5951, + "step": 373 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006946086928900053, + "loss": 0.5526, + "step": 374 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006931578987670395, + "loss": 0.4395, + "step": 375 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006917051906051005, + "loss": 0.4951, + "step": 376 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006902505827993541, + "loss": 0.5379, + "step": 377 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006887940897637908, + "loss": 0.4688, + "step": 378 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006873357259310815, + "loss": 0.422, + "step": 379 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006858755057524354, + "loss": 0.5442, + "step": 380 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006844134436974567, + "loss": 0.5796, + "step": 381 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006829495542540013, + "loss": 0.3116, + "step": 382 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006814838519280324, + "loss": 0.5884, + "step": 383 + }, + { + "epoch": 1.95, + "learning_rate": 0.000680016351243478, + "loss": 0.4409, + "step": 384 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006785470667420861, + "loss": 0.5072, + "step": 385 + }, + { + "epoch": 1.96, + "learning_rate": 0.000677076012983281, + "loss": 0.6952, + "step": 386 + }, + { + "epoch": 1.96, + "learning_rate": 0.000675603204544019, + "loss": 0.5757, + "step": 387 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006741286560186436, + "loss": 0.4874, + "step": 388 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006726523820187413, + "loss": 0.6527, + "step": 389 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006711743971729967, + "loss": 0.3801, + "step": 390 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006696947161270476, + "loss": 0.5924, + "step": 391 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006682133535433393, + "loss": 0.4358, + "step": 392 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006667303241009804, + "loss": 0.3506, + "step": 393 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006652456424955964, + "loss": 0.4905, + "step": 394 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006637593234391843, + "loss": 0.207, + "step": 395 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006622713816599673, + "loss": 0.2183, + "step": 396 + }, + { + "epoch": 2.02, + "learning_rate": 0.0006607818319022481, + "loss": 0.2478, + "step": 397 + }, + { + "epoch": 2.02, + "learning_rate": 0.0006592906889262632, + "loss": 0.2421, + "step": 398 + }, + { + "epoch": 2.03, + "learning_rate": 0.0006577979675080369, + "loss": 0.2596, + "step": 399 + }, + { + "epoch": 2.03, + "learning_rate": 0.0006563036824392345, + "loss": 0.1455, + "step": 400 + }, + { + "epoch": 2.04, + "learning_rate": 0.0006548078485270152, + "loss": 0.2416, + "step": 401 + }, + { + "epoch": 2.04, + "learning_rate": 0.0006533104805938873, + "loss": 0.1597, + "step": 402 + }, + { + "epoch": 2.05, + "learning_rate": 0.0006518115934775586, + "loss": 0.2274, + "step": 403 + }, + { + "epoch": 2.05, + "learning_rate": 0.0006503112020307916, + "loss": 0.2043, + "step": 404 + }, + { + "epoch": 2.06, + "learning_rate": 0.0006488093211212554, + "loss": 0.1718, + "step": 405 + }, + { + "epoch": 2.06, + "learning_rate": 0.0006473059656313782, + "loss": 0.2003, + "step": 406 + }, + { + "epoch": 2.07, + "learning_rate": 0.0006458011504582005, + "loss": 0.2682, + "step": 407 + }, + { + "epoch": 2.07, + "learning_rate": 0.0006442948905132266, + "loss": 0.3159, + "step": 408 + }, + { + "epoch": 2.08, + "learning_rate": 0.0006427872007222777, + "loss": 0.1926, + "step": 409 + }, + { + "epoch": 2.08, + "learning_rate": 0.0006412780960253436, + "loss": 0.1705, + "step": 410 + }, + { + "epoch": 2.09, + "learning_rate": 0.0006397675913764345, + "loss": 0.244, + "step": 411 + }, + { + "epoch": 2.09, + "learning_rate": 0.0006382557017434331, + "loss": 0.263, + "step": 412 + }, + { + "epoch": 2.1, + "learning_rate": 0.0006367424421079462, + "loss": 0.1458, + "step": 413 + }, + { + "epoch": 2.1, + "learning_rate": 0.0006352278274651562, + "loss": 0.2241, + "step": 414 + }, + { + "epoch": 2.11, + "learning_rate": 0.0006337118728236722, + "loss": 0.1758, + "step": 415 + }, + { + "epoch": 2.11, + "learning_rate": 0.0006321945932053822, + "loss": 0.2477, + "step": 416 + }, + { + "epoch": 2.12, + "learning_rate": 0.0006306760036453034, + "loss": 0.2506, + "step": 417 + }, + { + "epoch": 2.12, + "learning_rate": 0.0006291561191914333, + "loss": 0.1463, + "step": 418 + }, + { + "epoch": 2.13, + "learning_rate": 0.0006276349549046008, + "loss": 0.2307, + "step": 419 + }, + { + "epoch": 2.13, + "learning_rate": 0.0006261125258583171, + "loss": 0.2191, + "step": 420 + }, + { + "epoch": 2.14, + "learning_rate": 0.0006245888471386262, + "loss": 0.168, + "step": 421 + }, + { + "epoch": 2.14, + "learning_rate": 0.0006230639338439548, + "loss": 0.2035, + "step": 422 + }, + { + "epoch": 2.15, + "learning_rate": 0.000621537801084964, + "loss": 0.1725, + "step": 423 + }, + { + "epoch": 2.15, + "learning_rate": 0.0006200104639843984, + "loss": 0.2434, + "step": 424 + }, + { + "epoch": 2.16, + "learning_rate": 0.0006184819376769364, + "loss": 0.156, + "step": 425 + }, + { + "epoch": 2.16, + "learning_rate": 0.0006169522373090413, + "loss": 0.2614, + "step": 426 + }, + { + "epoch": 2.17, + "learning_rate": 0.0006154213780388092, + "loss": 0.1728, + "step": 427 + }, + { + "epoch": 2.17, + "learning_rate": 0.0006138893750358212, + "loss": 0.4142, + "step": 428 + }, + { + "epoch": 2.18, + "learning_rate": 0.0006123562434809912, + "loss": 0.2358, + "step": 429 + }, + { + "epoch": 2.18, + "learning_rate": 0.0006108219985664161, + "loss": 0.1511, + "step": 430 + }, + { + "epoch": 2.19, + "learning_rate": 0.0006092866554952256, + "loss": 0.2079, + "step": 431 + }, + { + "epoch": 2.19, + "learning_rate": 0.0006077502294814311, + "loss": 0.2778, + "step": 432 + }, + { + "epoch": 2.2, + "learning_rate": 0.000606212735749775, + "loss": 0.1745, + "step": 433 + }, + { + "epoch": 2.2, + "learning_rate": 0.0006046741895355802, + "loss": 0.2087, + "step": 434 + }, + { + "epoch": 2.21, + "learning_rate": 0.0006031346060845986, + "loss": 0.3661, + "step": 435 + }, + { + "epoch": 2.21, + "learning_rate": 0.0006015940006528601, + "loss": 0.2497, + "step": 436 + }, + { + "epoch": 2.22, + "learning_rate": 0.0006000523885065223, + "loss": 0.1748, + "step": 437 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005985097849217179, + "loss": 0.2426, + "step": 438 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005969662051844041, + "loss": 0.2053, + "step": 439 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005954216645902108, + "loss": 0.2444, + "step": 440 + }, + { + "epoch": 2.24, + "learning_rate": 0.00059387617844429, + "loss": 0.2316, + "step": 441 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005923297620611623, + "loss": 0.3341, + "step": 442 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005907824307645668, + "loss": 0.2375, + "step": 443 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005892341998873089, + "loss": 0.1812, + "step": 444 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005876850847711072, + "loss": 0.1778, + "step": 445 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005861351007664434, + "loss": 0.2015, + "step": 446 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005845842632324087, + "loss": 0.2272, + "step": 447 + }, + { + "epoch": 2.27, + "learning_rate": 0.000583032587536552, + "loss": 0.2465, + "step": 448 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005814800890547278, + "loss": 0.2765, + "step": 449 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005799267831709441, + "loss": 0.2826, + "step": 450 + }, + { + "epoch": 2.29, + "learning_rate": 0.000578372685277209, + "loss": 0.2576, + "step": 451 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005768178107733791, + "loss": 0.3645, + "step": 452 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005752621750670068, + "loss": 0.1209, + "step": 453 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005737057935731867, + "loss": 0.2813, + "step": 454 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005721486817144044, + "loss": 0.2002, + "step": 455 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005705908549203824, + "loss": 0.249, + "step": 456 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005690323286279274, + "loss": 0.2097, + "step": 457 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005674731182807781, + "loss": 0.2456, + "step": 458 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005659132393294514, + "loss": 0.2035, + "step": 459 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005643527072310891, + "loss": 0.1656, + "step": 460 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005627915374493061, + "loss": 0.334, + "step": 461 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005612297454540352, + "loss": 0.171, + "step": 462 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005596673467213756, + "loss": 0.1982, + "step": 463 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005581043567334383, + "loss": 0.1921, + "step": 464 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005565407909781934, + "loss": 0.2609, + "step": 465 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005549766649493165, + "loss": 0.2503, + "step": 466 + }, + { + "epoch": 2.37, + "learning_rate": 0.000553411994146035, + "loss": 0.2269, + "step": 467 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005518467940729739, + "loss": 0.1349, + "step": 468 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005502810802400039, + "loss": 0.2037, + "step": 469 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005487148681620861, + "loss": 0.1903, + "step": 470 + }, + { + "epoch": 2.39, + "learning_rate": 0.000547148173359119, + "loss": 0.187, + "step": 471 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005455810113557839, + "loss": 0.211, + "step": 472 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005440133976813926, + "loss": 0.2496, + "step": 473 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005424453478697321, + "loss": 0.2057, + "step": 474 + }, + { + "epoch": 2.41, + "learning_rate": 0.000540876877458911, + "loss": 0.1973, + "step": 475 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005393080019912061, + "loss": 0.232, + "step": 476 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005377387370129079, + "loss": 0.1924, + "step": 477 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005361690980741663, + "loss": 0.2347, + "step": 478 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005345991007288371, + "loss": 0.2134, + "step": 479 + }, + { + "epoch": 2.44, + "learning_rate": 0.000533028760534328, + "loss": 0.2552, + "step": 480 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005314580930514431, + "loss": 0.3036, + "step": 481 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005298871138442307, + "loss": 0.1862, + "step": 482 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005283158384798275, + "loss": 0.3097, + "step": 483 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005267442825283048, + "loss": 0.2764, + "step": 484 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005251724615625145, + "loss": 0.2332, + "step": 485 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005236003911579344, + "loss": 0.1589, + "step": 486 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005220280868925145, + "loss": 0.1943, + "step": 487 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005204555643465215, + "loss": 0.1713, + "step": 488 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005188828391023856, + "loss": 0.2151, + "step": 489 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005173099267445451, + "loss": 0.1708, + "step": 490 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005157368428592932, + "loss": 0.1367, + "step": 491 + }, + { + "epoch": 2.5, + "learning_rate": 0.000514163603034622, + "loss": 0.2488, + "step": 492 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005125902228600693, + "loss": 0.1392, + "step": 493 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005110167179265636, + "loss": 0.2407, + "step": 494 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005094431038262692, + "loss": 0.2323, + "step": 495 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005078693961524329, + "loss": 0.236, + "step": 496 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005062956104992285, + "loss": 0.1665, + "step": 497 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005047217624616019, + "loss": 0.2289, + "step": 498 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005031478676351178, + "loss": 0.6418, + "step": 499 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005015739416158049, + "loss": 0.1473, + "step": 500 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005, + "loss": 0.2873, + "step": 501 + }, + { + "epoch": 2.55, + "learning_rate": 0.0004984260583841952, + "loss": 0.1745, + "step": 502 + }, + { + "epoch": 2.55, + "learning_rate": 0.0004968521323648822, + "loss": 0.2458, + "step": 503 + }, + { + "epoch": 2.56, + "learning_rate": 0.0004952782375383984, + "loss": 0.2808, + "step": 504 + }, + { + "epoch": 2.56, + "learning_rate": 0.0004937043895007717, + "loss": 0.1909, + "step": 505 + }, + { + "epoch": 2.57, + "learning_rate": 0.0004921306038475671, + "loss": 0.2185, + "step": 506 + }, + { + "epoch": 2.57, + "learning_rate": 0.0004905568961737309, + "loss": 0.236, + "step": 507 + }, + { + "epoch": 2.58, + "learning_rate": 0.0004889832820734367, + "loss": 0.2238, + "step": 508 + }, + { + "epoch": 2.58, + "learning_rate": 0.0004874097771399308, + "loss": 0.207, + "step": 509 + }, + { + "epoch": 2.59, + "learning_rate": 0.0004858363969653781, + "loss": 0.2601, + "step": 510 + }, + { + "epoch": 2.59, + "learning_rate": 0.00048426315714070683, + "loss": 0.2049, + "step": 511 + }, + { + "epoch": 2.6, + "learning_rate": 0.00048269007325545506, + "loss": 0.2763, + "step": 512 + }, + { + "epoch": 2.6, + "learning_rate": 0.00048111716089761455, + "loss": 0.2787, + "step": 513 + }, + { + "epoch": 2.61, + "learning_rate": 0.00047954443565347867, + "loss": 0.2045, + "step": 514 + }, + { + "epoch": 2.61, + "learning_rate": 0.00047797191310748567, + "loss": 0.2062, + "step": 515 + }, + { + "epoch": 2.62, + "learning_rate": 0.00047639960884206576, + "loss": 0.1595, + "step": 516 + }, + { + "epoch": 2.62, + "learning_rate": 0.0004748275384374856, + "loss": 0.1786, + "step": 517 + }, + { + "epoch": 2.63, + "learning_rate": 0.00047325571747169545, + "loss": 0.2422, + "step": 518 + }, + { + "epoch": 2.63, + "learning_rate": 0.00047168416152017255, + "loss": 0.1506, + "step": 519 + }, + { + "epoch": 2.64, + "learning_rate": 0.0004701128861557694, + "loss": 0.1656, + "step": 520 + }, + { + "epoch": 2.64, + "learning_rate": 0.0004685419069485569, + "loss": 0.26, + "step": 521 + }, + { + "epoch": 2.65, + "learning_rate": 0.0004669712394656723, + "loss": 0.3045, + "step": 522 + }, + { + "epoch": 2.65, + "learning_rate": 0.00046540089927116305, + "loss": 0.2051, + "step": 523 + }, + { + "epoch": 2.66, + "learning_rate": 0.00046383090192583397, + "loss": 0.198, + "step": 524 + }, + { + "epoch": 2.66, + "learning_rate": 0.0004622612629870923, + "loss": 0.2119, + "step": 525 + }, + { + "epoch": 2.67, + "learning_rate": 0.00046069199800879404, + "loss": 0.2543, + "step": 526 + }, + { + "epoch": 2.68, + "learning_rate": 0.00045912312254108907, + "loss": 0.2439, + "step": 527 + }, + { + "epoch": 2.68, + "learning_rate": 0.00045755465213026807, + "loss": 0.1628, + "step": 528 + }, + { + "epoch": 2.69, + "learning_rate": 0.00045598660231860745, + "loss": 0.2464, + "step": 529 + }, + { + "epoch": 2.69, + "learning_rate": 0.0004544189886442162, + "loss": 0.1534, + "step": 530 + }, + { + "epoch": 2.7, + "learning_rate": 0.0004528518266408811, + "loss": 0.2754, + "step": 531 + }, + { + "epoch": 2.7, + "learning_rate": 0.00045128513183791385, + "loss": 0.2381, + "step": 532 + }, + { + "epoch": 2.71, + "learning_rate": 0.000449718919759996, + "loss": 0.3008, + "step": 533 + }, + { + "epoch": 2.71, + "learning_rate": 0.0004481532059270262, + "loss": 0.3641, + "step": 534 + }, + { + "epoch": 2.72, + "learning_rate": 0.0004465880058539652, + "loss": 0.3051, + "step": 535 + }, + { + "epoch": 2.72, + "learning_rate": 0.00044502333505068355, + "loss": 0.2982, + "step": 536 + }, + { + "epoch": 2.73, + "learning_rate": 0.0004434592090218065, + "loss": 0.1558, + "step": 537 + }, + { + "epoch": 2.73, + "learning_rate": 0.0004418956432665618, + "loss": 0.2677, + "step": 538 + }, + { + "epoch": 2.74, + "learning_rate": 0.0004403326532786245, + "loss": 0.2461, + "step": 539 + }, + { + "epoch": 2.74, + "learning_rate": 0.00043877025454596493, + "loss": 0.2472, + "step": 540 + }, + { + "epoch": 2.75, + "learning_rate": 0.000437208462550694, + "loss": 0.2212, + "step": 541 + }, + { + "epoch": 2.75, + "learning_rate": 0.0004356472927689109, + "loss": 0.198, + "step": 542 + }, + { + "epoch": 2.76, + "learning_rate": 0.00043408676067054866, + "loss": 0.2927, + "step": 543 + }, + { + "epoch": 2.76, + "learning_rate": 0.000432526881719222, + "loss": 0.2636, + "step": 544 + }, + { + "epoch": 2.77, + "learning_rate": 0.00043096767137207256, + "loss": 0.359, + "step": 545 + }, + { + "epoch": 2.77, + "learning_rate": 0.0004294091450796177, + "loss": 0.2385, + "step": 546 + }, + { + "epoch": 2.78, + "learning_rate": 0.0004278513182855956, + "loss": 0.136, + "step": 547 + }, + { + "epoch": 2.78, + "learning_rate": 0.0004262942064268134, + "loss": 0.1927, + "step": 548 + }, + { + "epoch": 2.79, + "learning_rate": 0.00042473782493299324, + "loss": 0.2469, + "step": 549 + }, + { + "epoch": 2.79, + "learning_rate": 0.000423182189226621, + "loss": 0.252, + "step": 550 + }, + { + "epoch": 2.8, + "learning_rate": 0.00042162731472279095, + "loss": 0.2133, + "step": 551 + }, + { + "epoch": 2.8, + "learning_rate": 0.000420073216829056, + "loss": 0.2453, + "step": 552 + }, + { + "epoch": 2.81, + "learning_rate": 0.0004185199109452721, + "loss": 0.274, + "step": 553 + }, + { + "epoch": 2.81, + "learning_rate": 0.00041696741246344813, + "loss": 0.2342, + "step": 554 + }, + { + "epoch": 2.82, + "learning_rate": 0.00041541573676759125, + "loss": 0.17, + "step": 555 + }, + { + "epoch": 2.82, + "learning_rate": 0.00041386489923355666, + "loss": 0.234, + "step": 556 + }, + { + "epoch": 2.83, + "learning_rate": 0.000412314915228893, + "loss": 0.1674, + "step": 557 + }, + { + "epoch": 2.83, + "learning_rate": 0.00041076580011269127, + "loss": 0.2432, + "step": 558 + }, + { + "epoch": 2.84, + "learning_rate": 0.0004092175692354333, + "loss": 0.2297, + "step": 559 + }, + { + "epoch": 2.84, + "learning_rate": 0.00040767023793883785, + "loss": 0.2347, + "step": 560 + }, + { + "epoch": 2.85, + "learning_rate": 0.0004061238215557103, + "loss": 0.2065, + "step": 561 + }, + { + "epoch": 2.85, + "learning_rate": 0.0004045783354097893, + "loss": 0.1912, + "step": 562 + }, + { + "epoch": 2.86, + "learning_rate": 0.0004030337948155962, + "loss": 0.2439, + "step": 563 + }, + { + "epoch": 2.86, + "learning_rate": 0.00040149021507828223, + "loss": 0.1611, + "step": 564 + }, + { + "epoch": 2.87, + "learning_rate": 0.0003999476114934778, + "loss": 0.2625, + "step": 565 + }, + { + "epoch": 2.87, + "learning_rate": 0.0003984059993471399, + "loss": 0.1553, + "step": 566 + }, + { + "epoch": 2.88, + "learning_rate": 0.0003968653939154016, + "loss": 0.2644, + "step": 567 + }, + { + "epoch": 2.88, + "learning_rate": 0.00039532581046441994, + "loss": 0.1631, + "step": 568 + }, + { + "epoch": 2.89, + "learning_rate": 0.00039378726425022515, + "loss": 0.2319, + "step": 569 + }, + { + "epoch": 2.89, + "learning_rate": 0.00039224977051856904, + "loss": 0.1823, + "step": 570 + }, + { + "epoch": 2.9, + "learning_rate": 0.0003907133445047747, + "loss": 0.1732, + "step": 571 + }, + { + "epoch": 2.9, + "learning_rate": 0.000389178001433584, + "loss": 0.1973, + "step": 572 + }, + { + "epoch": 2.91, + "learning_rate": 0.000387643756519009, + "loss": 0.2714, + "step": 573 + }, + { + "epoch": 2.91, + "learning_rate": 0.00038611062496417893, + "loss": 0.2124, + "step": 574 + }, + { + "epoch": 2.92, + "learning_rate": 0.000384578621961191, + "loss": 0.3089, + "step": 575 + }, + { + "epoch": 2.92, + "learning_rate": 0.00038304776269095886, + "loss": 0.2345, + "step": 576 + }, + { + "epoch": 2.93, + "learning_rate": 0.0003815180623230637, + "loss": 0.1838, + "step": 577 + }, + { + "epoch": 2.93, + "learning_rate": 0.0003799895360156017, + "loss": 0.1751, + "step": 578 + }, + { + "epoch": 2.94, + "learning_rate": 0.00037846219891503606, + "loss": 0.1985, + "step": 579 + }, + { + "epoch": 2.94, + "learning_rate": 0.00037693606615604524, + "loss": 0.2054, + "step": 580 + }, + { + "epoch": 2.95, + "learning_rate": 0.00037541115286137403, + "loss": 0.3287, + "step": 581 + }, + { + "epoch": 2.95, + "learning_rate": 0.00037388747414168294, + "loss": 0.1547, + "step": 582 + }, + { + "epoch": 2.96, + "learning_rate": 0.00037236504509539936, + "loss": 0.2142, + "step": 583 + }, + { + "epoch": 2.96, + "learning_rate": 0.0003708438808085668, + "loss": 0.2749, + "step": 584 + }, + { + "epoch": 2.97, + "learning_rate": 0.00036932399635469673, + "loss": 0.251, + "step": 585 + }, + { + "epoch": 2.97, + "learning_rate": 0.0003678054067946178, + "loss": 0.3888, + "step": 586 + }, + { + "epoch": 2.98, + "learning_rate": 0.00036628812717632794, + "loss": 0.1405, + "step": 587 + }, + { + "epoch": 2.98, + "learning_rate": 0.00036477217253484397, + "loss": 0.2749, + "step": 588 + }, + { + "epoch": 2.99, + "learning_rate": 0.000363257557892054, + "loss": 0.1246, + "step": 589 + }, + { + "epoch": 2.99, + "learning_rate": 0.00036174429825656685, + "loss": 0.2626, + "step": 590 + }, + { + "epoch": 3.0, + "learning_rate": 0.0003602324086235655, + "loss": 0.1572, + "step": 591 + }, + { + "epoch": 3.01, + "learning_rate": 0.00035872190397465636, + "loss": 0.0793, + "step": 592 + }, + { + "epoch": 3.01, + "learning_rate": 0.00035721279927772233, + "loss": 0.0366, + "step": 593 + }, + { + "epoch": 3.02, + "learning_rate": 0.0003557051094867735, + "loss": 0.0661, + "step": 594 + }, + { + "epoch": 3.02, + "learning_rate": 0.0003541988495417997, + "loss": 0.0447, + "step": 595 + }, + { + "epoch": 3.03, + "learning_rate": 0.00035269403436862174, + "loss": 0.0356, + "step": 596 + }, + { + "epoch": 3.03, + "learning_rate": 0.0003511906788787447, + "loss": 0.0507, + "step": 597 + }, + { + "epoch": 3.04, + "learning_rate": 0.00034968879796920836, + "loss": 0.0461, + "step": 598 + }, + { + "epoch": 3.04, + "learning_rate": 0.0003481884065224415, + "loss": 0.0855, + "step": 599 + }, + { + "epoch": 3.05, + "learning_rate": 0.00034668951940611275, + "loss": 0.0666, + "step": 600 + }, + { + "epoch": 3.05, + "learning_rate": 0.0003451921514729848, + "loss": 0.0708, + "step": 601 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003436963175607656, + "loss": 0.0662, + "step": 602 + }, + { + "epoch": 3.06, + "learning_rate": 0.00034220203249196317, + "loss": 0.0593, + "step": 603 + }, + { + "epoch": 3.07, + "learning_rate": 0.0003407093110737368, + "loss": 0.0654, + "step": 604 + }, + { + "epoch": 3.07, + "learning_rate": 0.00033921816809775204, + "loss": 0.0445, + "step": 605 + }, + { + "epoch": 3.08, + "learning_rate": 0.00033772861834003274, + "loss": 0.0355, + "step": 606 + }, + { + "epoch": 3.08, + "learning_rate": 0.00033624067656081575, + "loss": 0.0426, + "step": 607 + }, + { + "epoch": 3.09, + "learning_rate": 0.0003347543575044035, + "loss": 0.0438, + "step": 608 + }, + { + "epoch": 3.09, + "learning_rate": 0.00033326967589901967, + "loss": 0.0836, + "step": 609 + }, + { + "epoch": 3.1, + "learning_rate": 0.0003317866464566607, + "loss": 0.0479, + "step": 610 + }, + { + "epoch": 3.1, + "learning_rate": 0.0003303052838729525, + "loss": 0.1101, + "step": 611 + }, + { + "epoch": 3.11, + "learning_rate": 0.00032882560282700336, + "loss": 0.0661, + "step": 612 + }, + { + "epoch": 3.11, + "learning_rate": 0.0003273476179812588, + "loss": 0.0452, + "step": 613 + }, + { + "epoch": 3.12, + "learning_rate": 0.0003258713439813565, + "loss": 0.0338, + "step": 614 + }, + { + "epoch": 3.12, + "learning_rate": 0.0003243967954559811, + "loss": 0.0277, + "step": 615 + }, + { + "epoch": 3.13, + "learning_rate": 0.0003229239870167191, + "loss": 0.0559, + "step": 616 + }, + { + "epoch": 3.13, + "learning_rate": 0.00032145293325791394, + "loss": 0.0732, + "step": 617 + }, + { + "epoch": 3.14, + "learning_rate": 0.0003199836487565222, + "loss": 0.123, + "step": 618 + }, + { + "epoch": 3.14, + "learning_rate": 0.0003185161480719677, + "loss": 0.0501, + "step": 619 + }, + { + "epoch": 3.15, + "learning_rate": 0.0003170504457459989, + "loss": 0.0836, + "step": 620 + }, + { + "epoch": 3.15, + "learning_rate": 0.0003155865563025433, + "loss": 0.0464, + "step": 621 + }, + { + "epoch": 3.16, + "learning_rate": 0.00031412449424756474, + "loss": 0.0927, + "step": 622 + }, + { + "epoch": 3.16, + "learning_rate": 0.0003126642740689186, + "loss": 0.0291, + "step": 623 + }, + { + "epoch": 3.17, + "learning_rate": 0.00031120591023620927, + "loss": 0.0793, + "step": 624 + }, + { + "epoch": 3.17, + "learning_rate": 0.0003097494172006459, + "loss": 0.0812, + "step": 625 + }, + { + "epoch": 3.18, + "learning_rate": 0.00030829480939489965, + "loss": 0.0852, + "step": 626 + }, + { + "epoch": 3.18, + "learning_rate": 0.00030684210123296053, + "loss": 0.1087, + "step": 627 + }, + { + "epoch": 3.19, + "learning_rate": 0.00030539130710999473, + "loss": 0.074, + "step": 628 + }, + { + "epoch": 3.19, + "learning_rate": 0.0003039424414022016, + "loss": 0.0655, + "step": 629 + }, + { + "epoch": 3.2, + "learning_rate": 0.00030249551846667206, + "loss": 0.0622, + "step": 630 + }, + { + "epoch": 3.2, + "learning_rate": 0.0003010505526412447, + "loss": 0.0373, + "step": 631 + }, + { + "epoch": 3.21, + "learning_rate": 0.0002996075582443658, + "loss": 0.054, + "step": 632 + }, + { + "epoch": 3.21, + "learning_rate": 0.0002981665495749457, + "loss": 0.0741, + "step": 633 + }, + { + "epoch": 3.22, + "learning_rate": 0.00029672754091221806, + "loss": 0.0931, + "step": 634 + }, + { + "epoch": 3.22, + "learning_rate": 0.0002952905465155977, + "loss": 0.0934, + "step": 635 + }, + { + "epoch": 3.23, + "learning_rate": 0.0002938555806245406, + "loss": 0.089, + "step": 636 + }, + { + "epoch": 3.23, + "learning_rate": 0.00029242265745840063, + "loss": 0.0873, + "step": 637 + }, + { + "epoch": 3.24, + "learning_rate": 0.00029099179121629115, + "loss": 0.0719, + "step": 638 + }, + { + "epoch": 3.24, + "learning_rate": 0.0002895629960769417, + "loss": 0.1076, + "step": 639 + }, + { + "epoch": 3.25, + "learning_rate": 0.0002881362861985606, + "loss": 0.0738, + "step": 640 + }, + { + "epoch": 3.25, + "learning_rate": 0.0002867116757186911, + "loss": 0.0744, + "step": 641 + }, + { + "epoch": 3.26, + "learning_rate": 0.00028528917875407435, + "loss": 0.0468, + "step": 642 + }, + { + "epoch": 3.26, + "learning_rate": 0.0002838688094005078, + "loss": 0.0523, + "step": 643 + }, + { + "epoch": 3.27, + "learning_rate": 0.00028245058173270623, + "loss": 0.0498, + "step": 644 + }, + { + "epoch": 3.27, + "learning_rate": 0.00028103450980416134, + "loss": 0.0621, + "step": 645 + }, + { + "epoch": 3.28, + "learning_rate": 0.0002796206076470044, + "loss": 0.0792, + "step": 646 + }, + { + "epoch": 3.28, + "learning_rate": 0.0002782088892718648, + "loss": 0.0536, + "step": 647 + }, + { + "epoch": 3.29, + "learning_rate": 0.00027679936866773316, + "loss": 0.0689, + "step": 648 + }, + { + "epoch": 3.29, + "learning_rate": 0.0002753920598018217, + "loss": 0.0561, + "step": 649 + }, + { + "epoch": 3.3, + "learning_rate": 0.00027398697661942627, + "loss": 0.0681, + "step": 650 + }, + { + "epoch": 3.3, + "learning_rate": 0.00027258413304378734, + "loss": 0.0969, + "step": 651 + }, + { + "epoch": 3.31, + "learning_rate": 0.00027118354297595395, + "loss": 0.0473, + "step": 652 + }, + { + "epoch": 3.31, + "learning_rate": 0.00026978522029464324, + "loss": 0.0433, + "step": 653 + }, + { + "epoch": 3.32, + "learning_rate": 0.0002683891788561055, + "loss": 0.0564, + "step": 654 + }, + { + "epoch": 3.32, + "learning_rate": 0.0002669954324939852, + "loss": 0.1075, + "step": 655 + }, + { + "epoch": 3.33, + "learning_rate": 0.00026560399501918467, + "loss": 0.0576, + "step": 656 + }, + { + "epoch": 3.34, + "learning_rate": 0.0002642148802197267, + "loss": 0.0872, + "step": 657 + }, + { + "epoch": 3.34, + "learning_rate": 0.0002628281018606186, + "loss": 0.1002, + "step": 658 + }, + { + "epoch": 3.35, + "learning_rate": 0.00026144367368371535, + "loss": 0.0549, + "step": 659 + }, + { + "epoch": 3.35, + "learning_rate": 0.0002600616094075835, + "loss": 0.0571, + "step": 660 + }, + { + "epoch": 3.36, + "learning_rate": 0.00025868192272736514, + "loss": 0.0875, + "step": 661 + }, + { + "epoch": 3.36, + "learning_rate": 0.0002573046273146427, + "loss": 0.0623, + "step": 662 + }, + { + "epoch": 3.37, + "learning_rate": 0.0002559297368173024, + "loss": 0.1347, + "step": 663 + }, + { + "epoch": 3.37, + "learning_rate": 0.00025455726485940013, + "loss": 0.0528, + "step": 664 + }, + { + "epoch": 3.38, + "learning_rate": 0.00025318722504102606, + "loss": 0.0508, + "step": 665 + }, + { + "epoch": 3.38, + "learning_rate": 0.0002518196309381696, + "loss": 0.0697, + "step": 666 + }, + { + "epoch": 3.39, + "learning_rate": 0.0002504544961025853, + "loss": 0.0655, + "step": 667 + }, + { + "epoch": 3.39, + "learning_rate": 0.00024909183406165834, + "loss": 0.0566, + "step": 668 + }, + { + "epoch": 3.4, + "learning_rate": 0.0002477316583182702, + "loss": 0.0714, + "step": 669 + }, + { + "epoch": 3.4, + "learning_rate": 0.00024637398235066526, + "loss": 0.0747, + "step": 670 + }, + { + "epoch": 3.41, + "learning_rate": 0.0002450188196123177, + "loss": 0.0617, + "step": 671 + }, + { + "epoch": 3.41, + "learning_rate": 0.00024366618353179642, + "loss": 0.0488, + "step": 672 + }, + { + "epoch": 3.42, + "learning_rate": 0.0002423160875126348, + "loss": 0.0622, + "step": 673 + }, + { + "epoch": 3.42, + "learning_rate": 0.00024096854493319475, + "loss": 0.0697, + "step": 674 + }, + { + "epoch": 3.43, + "learning_rate": 0.00023962356914653655, + "loss": 0.0897, + "step": 675 + }, + { + "epoch": 3.43, + "learning_rate": 0.00023828117348028528, + "loss": 0.078, + "step": 676 + }, + { + "epoch": 3.44, + "learning_rate": 0.00023694137123649945, + "loss": 0.0958, + "step": 677 + }, + { + "epoch": 3.44, + "learning_rate": 0.00023560417569153797, + "loss": 0.1041, + "step": 678 + }, + { + "epoch": 3.45, + "learning_rate": 0.0002342696000959309, + "loss": 0.0466, + "step": 679 + }, + { + "epoch": 3.45, + "learning_rate": 0.00023293765767424534, + "loss": 0.0474, + "step": 680 + }, + { + "epoch": 3.46, + "learning_rate": 0.00023160836162495652, + "loss": 0.0809, + "step": 681 + }, + { + "epoch": 3.46, + "learning_rate": 0.00023028172512031605, + "loss": 0.0506, + "step": 682 + }, + { + "epoch": 3.47, + "learning_rate": 0.00022895776130622182, + "loss": 0.0766, + "step": 683 + }, + { + "epoch": 3.47, + "learning_rate": 0.0002276364833020868, + "loss": 0.0307, + "step": 684 + }, + { + "epoch": 3.48, + "learning_rate": 0.00022631790420071064, + "loss": 0.0586, + "step": 685 + }, + { + "epoch": 3.48, + "learning_rate": 0.00022500203706814854, + "loss": 0.0805, + "step": 686 + }, + { + "epoch": 3.49, + "learning_rate": 0.00022368889494358237, + "loss": 0.0766, + "step": 687 + }, + { + "epoch": 3.49, + "learning_rate": 0.00022237849083919144, + "loss": 0.0723, + "step": 688 + }, + { + "epoch": 3.5, + "learning_rate": 0.00022107083774002361, + "loss": 0.0643, + "step": 689 + }, + { + "epoch": 3.5, + "learning_rate": 0.00021976594860386594, + "loss": 0.0543, + "step": 690 + }, + { + "epoch": 3.51, + "learning_rate": 0.00021846383636111745, + "loss": 0.0654, + "step": 691 + }, + { + "epoch": 3.51, + "learning_rate": 0.0002171645139146601, + "loss": 0.066, + "step": 692 + }, + { + "epoch": 3.52, + "learning_rate": 0.00021586799413973135, + "loss": 0.0478, + "step": 693 + }, + { + "epoch": 3.52, + "learning_rate": 0.00021457428988379634, + "loss": 0.1587, + "step": 694 + }, + { + "epoch": 3.53, + "learning_rate": 0.00021328341396642091, + "loss": 0.0895, + "step": 695 + }, + { + "epoch": 3.53, + "learning_rate": 0.00021199537917914385, + "loss": 0.0559, + "step": 696 + }, + { + "epoch": 3.54, + "learning_rate": 0.0002107101982853511, + "loss": 0.0638, + "step": 697 + }, + { + "epoch": 3.54, + "learning_rate": 0.00020942788402014868, + "loss": 0.0335, + "step": 698 + }, + { + "epoch": 3.55, + "learning_rate": 0.0002081484490902366, + "loss": 0.2609, + "step": 699 + }, + { + "epoch": 3.55, + "learning_rate": 0.0002068719061737831, + "loss": 0.0565, + "step": 700 + }, + { + "epoch": 3.56, + "learning_rate": 0.00020559826792029885, + "loss": 0.0791, + "step": 701 + }, + { + "epoch": 3.56, + "learning_rate": 0.00020432754695051135, + "loss": 0.0252, + "step": 702 + }, + { + "epoch": 3.57, + "learning_rate": 0.0002030597558562406, + "loss": 0.0817, + "step": 703 + }, + { + "epoch": 3.57, + "learning_rate": 0.0002017949072002737, + "loss": 0.014, + "step": 704 + }, + { + "epoch": 3.58, + "learning_rate": 0.0002005330135162408, + "loss": 0.0377, + "step": 705 + }, + { + "epoch": 3.58, + "learning_rate": 0.00019927408730848988, + "loss": 0.0633, + "step": 706 + }, + { + "epoch": 3.59, + "learning_rate": 0.00019801814105196498, + "loss": 0.0667, + "step": 707 + }, + { + "epoch": 3.59, + "learning_rate": 0.00019676518719207977, + "loss": 0.0506, + "step": 708 + }, + { + "epoch": 3.6, + "learning_rate": 0.00019551523814459665, + "loss": 0.0661, + "step": 709 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001942683062955024, + "loss": 0.0808, + "step": 710 + }, + { + "epoch": 3.61, + "learning_rate": 0.00019302440400088605, + "loss": 0.0402, + "step": 711 + }, + { + "epoch": 3.61, + "learning_rate": 0.00019178354358681548, + "loss": 0.0523, + "step": 712 + }, + { + "epoch": 3.62, + "learning_rate": 0.00019054573734921716, + "loss": 0.0338, + "step": 713 + }, + { + "epoch": 3.62, + "learning_rate": 0.00018931099755375202, + "loss": 0.0693, + "step": 714 + }, + { + "epoch": 3.63, + "learning_rate": 0.00018807933643569559, + "loss": 0.0384, + "step": 715 + }, + { + "epoch": 3.63, + "learning_rate": 0.00018685076619981607, + "loss": 0.0587, + "step": 716 + }, + { + "epoch": 3.64, + "learning_rate": 0.0001856252990202537, + "loss": 0.0533, + "step": 717 + }, + { + "epoch": 3.64, + "learning_rate": 0.0001844029470403993, + "loss": 0.0493, + "step": 718 + }, + { + "epoch": 3.65, + "learning_rate": 0.00018318372237277565, + "loss": 0.0897, + "step": 719 + }, + { + "epoch": 3.65, + "learning_rate": 0.00018196763709891524, + "loss": 0.0726, + "step": 720 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001807547032692424, + "loss": 0.0668, + "step": 721 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001795449329029531, + "loss": 0.0753, + "step": 722 + }, + { + "epoch": 3.67, + "learning_rate": 0.00017833833798789594, + "loss": 0.071, + "step": 723 + }, + { + "epoch": 3.68, + "learning_rate": 0.00017713493048045293, + "loss": 0.0453, + "step": 724 + }, + { + "epoch": 3.68, + "learning_rate": 0.000175934722305422, + "loss": 0.0406, + "step": 725 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001747377253558982, + "loss": 0.0635, + "step": 726 + }, + { + "epoch": 3.69, + "learning_rate": 0.00017354395149315533, + "loss": 0.0737, + "step": 727 + }, + { + "epoch": 3.7, + "learning_rate": 0.00017235341254653004, + "loss": 0.0618, + "step": 728 + }, + { + "epoch": 3.7, + "learning_rate": 0.00017116612031330253, + "loss": 0.0542, + "step": 729 + }, + { + "epoch": 3.71, + "learning_rate": 0.00016998208655858137, + "loss": 0.081, + "step": 730 + }, + { + "epoch": 3.71, + "learning_rate": 0.00016880132301518597, + "loss": 0.0573, + "step": 731 + }, + { + "epoch": 3.72, + "learning_rate": 0.00016762384138353076, + "loss": 0.0335, + "step": 732 + }, + { + "epoch": 3.72, + "learning_rate": 0.00016644965333150846, + "loss": 0.0498, + "step": 733 + }, + { + "epoch": 3.73, + "learning_rate": 0.00016527877049437624, + "loss": 0.0684, + "step": 734 + }, + { + "epoch": 3.73, + "learning_rate": 0.00016411120447463806, + "loss": 0.0448, + "step": 735 + }, + { + "epoch": 3.74, + "learning_rate": 0.00016294696684193154, + "loss": 0.0779, + "step": 736 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001617860691329126, + "loss": 0.0652, + "step": 737 + }, + { + "epoch": 3.75, + "learning_rate": 0.00016062852285114122, + "loss": 0.0356, + "step": 738 + }, + { + "epoch": 3.75, + "learning_rate": 0.00015947433946696693, + "loss": 0.1102, + "step": 739 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001583235304174167, + "loss": 0.0407, + "step": 740 + }, + { + "epoch": 3.76, + "learning_rate": 0.00015717610710607949, + "loss": 0.0249, + "step": 741 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015603208090299498, + "loss": 0.053, + "step": 742 + }, + { + "epoch": 3.77, + "learning_rate": 0.00015489146314454, + "loss": 0.0357, + "step": 743 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001537542651333167, + "loss": 0.0729, + "step": 744 + }, + { + "epoch": 3.78, + "learning_rate": 0.00015262049813803956, + "loss": 0.0398, + "step": 745 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015149017339342576, + "loss": 0.0674, + "step": 746 + }, + { + "epoch": 3.79, + "learning_rate": 0.00015036330210008115, + "loss": 0.0381, + "step": 747 + }, + { + "epoch": 3.8, + "learning_rate": 0.0001492398954243916, + "loss": 0.0765, + "step": 748 + }, + { + "epoch": 3.8, + "learning_rate": 0.00014811996449841098, + "loss": 0.0245, + "step": 749 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001470035204197517, + "loss": 0.037, + "step": 750 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001458905742514734, + "loss": 0.0521, + "step": 751 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001447811370219757, + "loss": 0.0429, + "step": 752 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001436752197248861, + "loss": 0.0589, + "step": 753 + }, + { + "epoch": 3.83, + "learning_rate": 0.00014257283331895317, + "loss": 0.0922, + "step": 754 + }, + { + "epoch": 3.83, + "learning_rate": 0.00014147398872793693, + "loss": 0.0742, + "step": 755 + }, + { + "epoch": 3.84, + "learning_rate": 0.00014037869684050113, + "loss": 0.0461, + "step": 756 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001392869685101044, + "loss": 0.0312, + "step": 757 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013819881455489458, + "loss": 0.0173, + "step": 758 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013711424575759911, + "loss": 0.0386, + "step": 759 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013603327286542023, + "loss": 0.0292, + "step": 760 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013495590658992718, + "loss": 0.0973, + "step": 761 + }, + { + "epoch": 3.87, + "learning_rate": 0.000133882157606951, + "loss": 0.0646, + "step": 762 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013281203655647755, + "loss": 0.1611, + "step": 763 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001317455540425439, + "loss": 0.0415, + "step": 764 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013068272063313102, + "loss": 0.0521, + "step": 765 + }, + { + "epoch": 3.89, + "learning_rate": 0.00012962354686006083, + "loss": 0.035, + "step": 766 + }, + { + "epoch": 3.89, + "learning_rate": 0.00012856804321889116, + "loss": 0.0574, + "step": 767 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001275162201688118, + "loss": 0.0436, + "step": 768 + }, + { + "epoch": 3.9, + "learning_rate": 0.00012646808813254035, + "loss": 0.0405, + "step": 769 + }, + { + "epoch": 3.91, + "learning_rate": 0.00012542365749622047, + "loss": 0.0526, + "step": 770 + }, + { + "epoch": 3.91, + "learning_rate": 0.00012438293860931676, + "loss": 0.0275, + "step": 771 + }, + { + "epoch": 3.92, + "learning_rate": 0.00012334594178451424, + "loss": 0.0757, + "step": 772 + }, + { + "epoch": 3.92, + "learning_rate": 0.00012231267729761485, + "loss": 0.0317, + "step": 773 + }, + { + "epoch": 3.93, + "learning_rate": 0.00012128315538743645, + "loss": 0.0306, + "step": 774 + }, + { + "epoch": 3.93, + "learning_rate": 0.00012025738625571026, + "loss": 0.0453, + "step": 775 + }, + { + "epoch": 3.94, + "learning_rate": 0.00011923538006698154, + "loss": 0.044, + "step": 776 + }, + { + "epoch": 3.94, + "learning_rate": 0.00011821714694850688, + "loss": 0.0719, + "step": 777 + }, + { + "epoch": 3.95, + "learning_rate": 0.00011720269699015528, + "loss": 0.056, + "step": 778 + }, + { + "epoch": 3.95, + "learning_rate": 0.00011619204024430768, + "loss": 0.0287, + "step": 779 + }, + { + "epoch": 3.96, + "learning_rate": 0.00011518518672575701, + "loss": 0.0672, + "step": 780 + }, + { + "epoch": 3.96, + "learning_rate": 0.00011418214641160956, + "loss": 0.0477, + "step": 781 + }, + { + "epoch": 3.97, + "learning_rate": 0.00011318292924118584, + "loss": 0.0515, + "step": 782 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001121875451159221, + "loss": 0.0937, + "step": 783 + }, + { + "epoch": 3.98, + "learning_rate": 0.00011119600389927181, + "loss": 0.0534, + "step": 784 + }, + { + "epoch": 3.98, + "learning_rate": 0.00011020831541660914, + "loss": 0.0238, + "step": 785 + }, + { + "epoch": 3.99, + "learning_rate": 0.00010922448945512981, + "loss": 0.0686, + "step": 786 + }, + { + "epoch": 3.99, + "learning_rate": 0.00010824453576375576, + "loss": 0.0209, + "step": 787 + }, + { + "epoch": 4.0, + "learning_rate": 0.00010726846405303752, + "loss": 0.066, + "step": 788 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001062962839950587, + "loss": 0.0097, + "step": 789 + }, + { + "epoch": 4.01, + "learning_rate": 0.00010532800522333902, + "loss": 0.0172, + "step": 790 + }, + { + "epoch": 4.02, + "learning_rate": 0.00010436363733274057, + "loss": 0.0185, + "step": 791 + }, + { + "epoch": 4.02, + "learning_rate": 0.00010340318987937097, + "loss": 0.0164, + "step": 792 + }, + { + "epoch": 4.03, + "learning_rate": 0.00010244667238048988, + "loss": 0.0069, + "step": 793 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001014940943144142, + "loss": 0.018, + "step": 794 + }, + { + "epoch": 4.04, + "learning_rate": 0.00010054546512042422, + "loss": 0.0065, + "step": 795 + }, + { + "epoch": 4.04, + "learning_rate": 9.960079419866985e-05, + "loss": 0.0177, + "step": 796 + }, + { + "epoch": 4.05, + "learning_rate": 9.866009091007833e-05, + "loss": 0.0105, + "step": 797 + }, + { + "epoch": 4.05, + "learning_rate": 9.772336457626013e-05, + "loss": 0.0127, + "step": 798 + }, + { + "epoch": 4.06, + "learning_rate": 9.679062447941778e-05, + "loss": 0.0111, + "step": 799 + }, + { + "epoch": 4.06, + "learning_rate": 9.586187986225326e-05, + "loss": 0.0114, + "step": 800 + }, + { + "epoch": 4.07, + "learning_rate": 9.493713992787673e-05, + "loss": 0.006, + "step": 801 + }, + { + "epoch": 4.07, + "learning_rate": 9.401641383971476e-05, + "loss": 0.0041, + "step": 802 + }, + { + "epoch": 4.08, + "learning_rate": 9.309971072142037e-05, + "loss": 0.0126, + "step": 803 + }, + { + "epoch": 4.08, + "learning_rate": 9.218703965678204e-05, + "loss": 0.0107, + "step": 804 + }, + { + "epoch": 4.09, + "learning_rate": 9.12784096896338e-05, + "loss": 0.0045, + "step": 805 + }, + { + "epoch": 4.09, + "learning_rate": 9.03738298237658e-05, + "loss": 0.0039, + "step": 806 + }, + { + "epoch": 4.1, + "learning_rate": 8.94733090228349e-05, + "loss": 0.0067, + "step": 807 + }, + { + "epoch": 4.1, + "learning_rate": 8.857685621027567e-05, + "loss": 0.0114, + "step": 808 + }, + { + "epoch": 4.11, + "learning_rate": 8.768448026921244e-05, + "loss": 0.0121, + "step": 809 + }, + { + "epoch": 4.11, + "learning_rate": 8.679619004237111e-05, + "loss": 0.0055, + "step": 810 + }, + { + "epoch": 4.12, + "learning_rate": 8.591199433199127e-05, + "loss": 0.0422, + "step": 811 + }, + { + "epoch": 4.12, + "learning_rate": 8.503190189973914e-05, + "loss": 0.0126, + "step": 812 + }, + { + "epoch": 4.13, + "learning_rate": 8.415592146662105e-05, + "loss": 0.0071, + "step": 813 + }, + { + "epoch": 4.13, + "learning_rate": 8.328406171289621e-05, + "loss": 0.0035, + "step": 814 + }, + { + "epoch": 4.14, + "learning_rate": 8.24163312779917e-05, + "loss": 0.0216, + "step": 815 + }, + { + "epoch": 4.14, + "learning_rate": 8.155273876041613e-05, + "loss": 0.0069, + "step": 816 + }, + { + "epoch": 4.15, + "learning_rate": 8.069329271767484e-05, + "loss": 0.0219, + "step": 817 + }, + { + "epoch": 4.15, + "learning_rate": 7.983800166618483e-05, + "loss": 0.0105, + "step": 818 + }, + { + "epoch": 4.16, + "learning_rate": 7.898687408119065e-05, + "loss": 0.0114, + "step": 819 + }, + { + "epoch": 4.16, + "learning_rate": 7.813991839667995e-05, + "loss": 0.0269, + "step": 820 + }, + { + "epoch": 4.17, + "learning_rate": 7.72971430053005e-05, + "loss": 0.0235, + "step": 821 + }, + { + "epoch": 4.17, + "learning_rate": 7.645855625827657e-05, + "loss": 0.0289, + "step": 822 + }, + { + "epoch": 4.18, + "learning_rate": 7.562416646532661e-05, + "loss": 0.0042, + "step": 823 + }, + { + "epoch": 4.18, + "learning_rate": 7.479398189458003e-05, + "loss": 0.0193, + "step": 824 + }, + { + "epoch": 4.19, + "learning_rate": 7.396801077249677e-05, + "loss": 0.024, + "step": 825 + }, + { + "epoch": 4.19, + "learning_rate": 7.31462612837841e-05, + "loss": 0.006, + "step": 826 + }, + { + "epoch": 4.2, + "learning_rate": 7.232874157131669e-05, + "loss": 0.0372, + "step": 827 + }, + { + "epoch": 4.2, + "learning_rate": 7.15154597360555e-05, + "loss": 0.0196, + "step": 828 + }, + { + "epoch": 4.21, + "learning_rate": 7.070642383696762e-05, + "loss": 0.0154, + "step": 829 + }, + { + "epoch": 4.21, + "learning_rate": 6.99016418909459e-05, + "loss": 0.0152, + "step": 830 + }, + { + "epoch": 4.22, + "learning_rate": 6.910112187273066e-05, + "loss": 0.0112, + "step": 831 + }, + { + "epoch": 4.22, + "learning_rate": 6.830487171482935e-05, + "loss": 0.0072, + "step": 832 + }, + { + "epoch": 4.23, + "learning_rate": 6.75128993074388e-05, + "loss": 0.0038, + "step": 833 + }, + { + "epoch": 4.23, + "learning_rate": 6.672521249836688e-05, + "loss": 0.0065, + "step": 834 + }, + { + "epoch": 4.24, + "learning_rate": 6.594181909295427e-05, + "loss": 0.0224, + "step": 835 + }, + { + "epoch": 4.24, + "learning_rate": 6.516272685399794e-05, + "loss": 0.0035, + "step": 836 + }, + { + "epoch": 4.25, + "learning_rate": 6.438794350167337e-05, + "loss": 0.0209, + "step": 837 + }, + { + "epoch": 4.25, + "learning_rate": 6.36174767134588e-05, + "loss": 0.0052, + "step": 838 + }, + { + "epoch": 4.26, + "learning_rate": 6.285133412405858e-05, + "loss": 0.013, + "step": 839 + }, + { + "epoch": 4.26, + "learning_rate": 6.208952332532785e-05, + "loss": 0.0097, + "step": 840 + }, + { + "epoch": 4.27, + "learning_rate": 6.133205186619695e-05, + "loss": 0.0178, + "step": 841 + }, + { + "epoch": 4.27, + "learning_rate": 6.057892725259717e-05, + "loss": 0.0089, + "step": 842 + }, + { + "epoch": 4.28, + "learning_rate": 5.983015694738597e-05, + "loss": 0.0112, + "step": 843 + }, + { + "epoch": 4.28, + "learning_rate": 5.908574837027309e-05, + "loss": 0.0057, + "step": 844 + }, + { + "epoch": 4.29, + "learning_rate": 5.8345708897747095e-05, + "loss": 0.003, + "step": 845 + }, + { + "epoch": 4.29, + "learning_rate": 5.761004586300234e-05, + "loss": 0.0048, + "step": 846 + }, + { + "epoch": 4.3, + "learning_rate": 5.687876655586582e-05, + "loss": 0.0136, + "step": 847 + }, + { + "epoch": 4.3, + "learning_rate": 5.615187822272583e-05, + "loss": 0.0277, + "step": 848 + }, + { + "epoch": 4.31, + "learning_rate": 5.5429388066459305e-05, + "loss": 0.0309, + "step": 849 + }, + { + "epoch": 4.31, + "learning_rate": 5.4711303246361146e-05, + "loss": 0.0192, + "step": 850 + }, + { + "epoch": 4.32, + "learning_rate": 5.3997630878072356e-05, + "loss": 0.0061, + "step": 851 + }, + { + "epoch": 4.32, + "learning_rate": 5.328837803351083e-05, + "loss": 0.0031, + "step": 852 + }, + { + "epoch": 4.33, + "learning_rate": 5.258355174079993e-05, + "loss": 0.0031, + "step": 853 + }, + { + "epoch": 4.34, + "learning_rate": 5.1883158984199704e-05, + "loss": 0.0063, + "step": 854 + }, + { + "epoch": 4.34, + "learning_rate": 5.118720670403748e-05, + "loss": 0.0113, + "step": 855 + }, + { + "epoch": 4.35, + "learning_rate": 5.0495701796639096e-05, + "loss": 0.0044, + "step": 856 + }, + { + "epoch": 4.35, + "learning_rate": 4.980865111426003e-05, + "loss": 0.0022, + "step": 857 + }, + { + "epoch": 4.36, + "learning_rate": 4.9126061465018856e-05, + "loss": 0.0045, + "step": 858 + }, + { + "epoch": 4.36, + "learning_rate": 4.844793961282812e-05, + "loss": 0.0099, + "step": 859 + }, + { + "epoch": 4.37, + "learning_rate": 4.777429227732843e-05, + "loss": 0.0075, + "step": 860 + }, + { + "epoch": 4.37, + "learning_rate": 4.710512613382151e-05, + "loss": 0.014, + "step": 861 + }, + { + "epoch": 4.38, + "learning_rate": 4.6440447813204225e-05, + "loss": 0.005, + "step": 862 + }, + { + "epoch": 4.38, + "learning_rate": 4.578026390190232e-05, + "loss": 0.0102, + "step": 863 + }, + { + "epoch": 4.39, + "learning_rate": 4.512458094180616e-05, + "loss": 0.0027, + "step": 864 + }, + { + "epoch": 4.39, + "learning_rate": 4.447340543020473e-05, + "loss": 0.0066, + "step": 865 + }, + { + "epoch": 4.4, + "learning_rate": 4.3826743819722236e-05, + "loss": 0.0089, + "step": 866 + }, + { + "epoch": 4.4, + "learning_rate": 4.318460251825357e-05, + "loss": 0.008, + "step": 867 + }, + { + "epoch": 4.41, + "learning_rate": 4.2546987888901266e-05, + "loss": 0.0097, + "step": 868 + }, + { + "epoch": 4.41, + "learning_rate": 4.191390624991159e-05, + "loss": 0.0037, + "step": 869 + }, + { + "epoch": 4.42, + "learning_rate": 4.12853638746134e-05, + "loss": 0.0416, + "step": 870 + }, + { + "epoch": 4.42, + "learning_rate": 4.066136699135436e-05, + "loss": 0.0102, + "step": 871 + }, + { + "epoch": 4.43, + "learning_rate": 4.004192178344029e-05, + "loss": 0.0159, + "step": 872 + }, + { + "epoch": 4.43, + "learning_rate": 3.942703438907358e-05, + "loss": 0.0045, + "step": 873 + }, + { + "epoch": 4.44, + "learning_rate": 3.881671090129246e-05, + "loss": 0.0112, + "step": 874 + }, + { + "epoch": 4.44, + "learning_rate": 3.821095736791008e-05, + "loss": 0.0061, + "step": 875 + }, + { + "epoch": 4.45, + "learning_rate": 3.7609779791455744e-05, + "loss": 0.0139, + "step": 876 + }, + { + "epoch": 4.45, + "learning_rate": 3.7013184129113974e-05, + "loss": 0.0227, + "step": 877 + }, + { + "epoch": 4.46, + "learning_rate": 3.642117629266678e-05, + "loss": 0.0184, + "step": 878 + }, + { + "epoch": 4.46, + "learning_rate": 3.58337621484342e-05, + "loss": 0.0063, + "step": 879 + }, + { + "epoch": 4.47, + "learning_rate": 3.5250947517216634e-05, + "loss": 0.0095, + "step": 880 + }, + { + "epoch": 4.47, + "learning_rate": 3.467273817423688e-05, + "loss": 0.0451, + "step": 881 + }, + { + "epoch": 4.48, + "learning_rate": 3.40991398490833e-05, + "loss": 0.0086, + "step": 882 + }, + { + "epoch": 4.48, + "learning_rate": 3.353015822565253e-05, + "loss": 0.0197, + "step": 883 + }, + { + "epoch": 4.49, + "learning_rate": 3.296579894209345e-05, + "loss": 0.0232, + "step": 884 + }, + { + "epoch": 4.49, + "learning_rate": 3.240606759075143e-05, + "loss": 0.004, + "step": 885 + }, + { + "epoch": 4.5, + "learning_rate": 3.185096971811274e-05, + "loss": 0.0071, + "step": 886 + }, + { + "epoch": 4.5, + "learning_rate": 3.1300510824749276e-05, + "loss": 0.0077, + "step": 887 + }, + { + "epoch": 4.51, + "learning_rate": 3.0754696365265064e-05, + "loss": 0.0156, + "step": 888 + }, + { + "epoch": 4.51, + "learning_rate": 3.0213531748240763e-05, + "loss": 0.0067, + "step": 889 + }, + { + "epoch": 4.52, + "learning_rate": 2.9677022336181414e-05, + "loss": 0.0304, + "step": 890 + }, + { + "epoch": 4.52, + "learning_rate": 2.9145173445462582e-05, + "loss": 0.006, + "step": 891 + }, + { + "epoch": 4.53, + "learning_rate": 2.8617990346277655e-05, + "loss": 0.0188, + "step": 892 + }, + { + "epoch": 4.53, + "learning_rate": 2.8095478262585904e-05, + "loss": 0.0145, + "step": 893 + }, + { + "epoch": 4.54, + "learning_rate": 2.7577642372060676e-05, + "loss": 0.0076, + "step": 894 + }, + { + "epoch": 4.54, + "learning_rate": 2.7064487806037985e-05, + "loss": 0.0043, + "step": 895 + }, + { + "epoch": 4.55, + "learning_rate": 2.6556019649465524e-05, + "loss": 0.0237, + "step": 896 + }, + { + "epoch": 4.55, + "learning_rate": 2.605224294085279e-05, + "loss": 0.0117, + "step": 897 + }, + { + "epoch": 4.56, + "learning_rate": 2.5553162672220465e-05, + "loss": 0.0166, + "step": 898 + }, + { + "epoch": 4.56, + "learning_rate": 2.5058783789051463e-05, + "loss": 0.0019, + "step": 899 + }, + { + "epoch": 4.57, + "learning_rate": 2.45691111902418e-05, + "loss": 0.0276, + "step": 900 + }, + { + "epoch": 4.57, + "learning_rate": 2.4084149728051953e-05, + "loss": 0.0064, + "step": 901 + }, + { + "epoch": 4.58, + "learning_rate": 2.3603904208058692e-05, + "loss": 0.0092, + "step": 902 + }, + { + "epoch": 4.58, + "learning_rate": 2.3128379389108e-05, + "loss": 0.0086, + "step": 903 + }, + { + "epoch": 4.59, + "learning_rate": 2.2657579983267064e-05, + "loss": 0.0065, + "step": 904 + }, + { + "epoch": 4.59, + "learning_rate": 2.219151065577829e-05, + "loss": 0.0017, + "step": 905 + }, + { + "epoch": 4.6, + "learning_rate": 2.1730176025012817e-05, + "loss": 0.0046, + "step": 906 + }, + { + "epoch": 4.6, + "learning_rate": 2.1273580662424797e-05, + "loss": 0.0067, + "step": 907 + }, + { + "epoch": 4.61, + "learning_rate": 2.082172909250568e-05, + "loss": 0.0064, + "step": 908 + }, + { + "epoch": 4.61, + "learning_rate": 2.0374625792740464e-05, + "loss": 0.0069, + "step": 909 + }, + { + "epoch": 4.62, + "learning_rate": 1.993227519356189e-05, + "loss": 0.0041, + "step": 910 + }, + { + "epoch": 4.62, + "learning_rate": 1.94946816783077e-05, + "loss": 0.0047, + "step": 911 + }, + { + "epoch": 4.63, + "learning_rate": 1.906184958317664e-05, + "loss": 0.0057, + "step": 912 + }, + { + "epoch": 4.63, + "learning_rate": 1.863378319718578e-05, + "loss": 0.0069, + "step": 913 + }, + { + "epoch": 4.64, + "learning_rate": 1.82104867621275e-05, + "loss": 0.0217, + "step": 914 + }, + { + "epoch": 4.64, + "learning_rate": 1.7791964472528233e-05, + "loss": 0.0219, + "step": 915 + }, + { + "epoch": 4.65, + "learning_rate": 1.737822047560611e-05, + "loss": 0.0068, + "step": 916 + }, + { + "epoch": 4.65, + "learning_rate": 1.696925887123052e-05, + "loss": 0.0136, + "step": 917 + }, + { + "epoch": 4.66, + "learning_rate": 1.656508371188109e-05, + "loss": 0.0102, + "step": 918 + }, + { + "epoch": 4.66, + "learning_rate": 1.616569900260767e-05, + "loss": 0.0051, + "step": 919 + }, + { + "epoch": 4.67, + "learning_rate": 1.577110870099041e-05, + "loss": 0.0154, + "step": 920 + }, + { + "epoch": 4.68, + "learning_rate": 1.538131671710108e-05, + "loss": 0.0713, + "step": 921 + }, + { + "epoch": 4.68, + "learning_rate": 1.4996326913463754e-05, + "loss": 0.0157, + "step": 922 + }, + { + "epoch": 4.69, + "learning_rate": 1.461614310501691e-05, + "loss": 0.019, + "step": 923 + }, + { + "epoch": 4.69, + "learning_rate": 1.4240769059075342e-05, + "loss": 0.0123, + "step": 924 + }, + { + "epoch": 4.7, + "learning_rate": 1.387020849529319e-05, + "loss": 0.0032, + "step": 925 + }, + { + "epoch": 4.7, + "learning_rate": 1.3504465085626639e-05, + "loss": 0.0044, + "step": 926 + }, + { + "epoch": 4.71, + "learning_rate": 1.3143542454297885e-05, + "loss": 0.0061, + "step": 927 + }, + { + "epoch": 4.71, + "learning_rate": 1.2787444177759067e-05, + "loss": 0.0145, + "step": 928 + }, + { + "epoch": 4.72, + "learning_rate": 1.243617378465689e-05, + "loss": 0.0038, + "step": 929 + }, + { + "epoch": 4.72, + "learning_rate": 1.208973475579761e-05, + "loss": 0.0049, + "step": 930 + }, + { + "epoch": 4.73, + "learning_rate": 1.1748130524112665e-05, + "loss": 0.0022, + "step": 931 + }, + { + "epoch": 4.73, + "learning_rate": 1.1411364474624264e-05, + "loss": 0.0133, + "step": 932 + }, + { + "epoch": 4.74, + "learning_rate": 1.1079439944412406e-05, + "loss": 0.0191, + "step": 933 + }, + { + "epoch": 4.74, + "learning_rate": 1.0752360222581469e-05, + "loss": 0.0045, + "step": 934 + }, + { + "epoch": 4.75, + "learning_rate": 1.0430128550227624e-05, + "loss": 0.008, + "step": 935 + }, + { + "epoch": 4.75, + "learning_rate": 1.0112748120406857e-05, + "loss": 0.0066, + "step": 936 + }, + { + "epoch": 4.76, + "learning_rate": 9.800222078103272e-06, + "loss": 0.0041, + "step": 937 + }, + { + "epoch": 4.76, + "learning_rate": 9.492553520197733e-06, + "loss": 0.0113, + "step": 938 + }, + { + "epoch": 4.77, + "learning_rate": 9.189745495437607e-06, + "loss": 0.0131, + "step": 939 + }, + { + "epoch": 4.77, + "learning_rate": 8.89180100440612e-06, + "loss": 0.0025, + "step": 940 + }, + { + "epoch": 4.78, + "learning_rate": 8.59872299949288e-06, + "loss": 0.0092, + "step": 941 + }, + { + "epoch": 4.78, + "learning_rate": 8.31051438486441e-06, + "loss": 0.0018, + "step": 942 + }, + { + "epoch": 4.79, + "learning_rate": 8.027178016435766e-06, + "loss": 0.0059, + "step": 943 + }, + { + "epoch": 4.79, + "learning_rate": 7.748716701841684e-06, + "loss": 0.018, + "step": 944 + }, + { + "epoch": 4.8, + "learning_rate": 7.475133200409212e-06, + "loss": 0.0161, + "step": 945 + }, + { + "epoch": 4.8, + "learning_rate": 7.206430223130278e-06, + "loss": 0.017, + "step": 946 + }, + { + "epoch": 4.81, + "learning_rate": 6.9426104326345e-06, + "loss": 0.003, + "step": 947 + }, + { + "epoch": 4.81, + "learning_rate": 6.683676443163312e-06, + "loss": 0.0087, + "step": 948 + }, + { + "epoch": 4.82, + "learning_rate": 6.429630820543597e-06, + "loss": 0.0113, + "step": 949 + }, + { + "epoch": 4.82, + "learning_rate": 6.180476082162656e-06, + "loss": 0.0156, + "step": 950 + }, + { + "epoch": 4.83, + "learning_rate": 5.936214696942887e-06, + "loss": 0.005, + "step": 951 + }, + { + "epoch": 4.83, + "learning_rate": 5.696849085317646e-06, + "loss": 0.0041, + "step": 952 + }, + { + "epoch": 4.84, + "learning_rate": 5.462381619207091e-06, + "loss": 0.0068, + "step": 953 + }, + { + "epoch": 4.84, + "learning_rate": 5.232814621994597e-06, + "loss": 0.0027, + "step": 954 + }, + { + "epoch": 4.85, + "learning_rate": 5.008150368503994e-06, + "loss": 0.0037, + "step": 955 + }, + { + "epoch": 4.85, + "learning_rate": 4.788391084976862e-06, + "loss": 0.0125, + "step": 956 + }, + { + "epoch": 4.86, + "learning_rate": 4.5735389490503265e-06, + "loss": 0.0126, + "step": 957 + }, + { + "epoch": 4.86, + "learning_rate": 4.36359608973591e-06, + "loss": 0.0088, + "step": 958 + }, + { + "epoch": 4.87, + "learning_rate": 4.158564587397828e-06, + "loss": 0.0032, + "step": 959 + }, + { + "epoch": 4.87, + "learning_rate": 3.9584464737330016e-06, + "loss": 0.0066, + "step": 960 + }, + { + "epoch": 4.88, + "learning_rate": 3.763243731750521e-06, + "loss": 0.0061, + "step": 961 + }, + { + "epoch": 4.88, + "learning_rate": 3.572958295752049e-06, + "loss": 0.0032, + "step": 962 + }, + { + "epoch": 4.89, + "learning_rate": 3.387592051312782e-06, + "loss": 0.0064, + "step": 963 + }, + { + "epoch": 4.89, + "learning_rate": 3.207146835262742e-06, + "loss": 0.0031, + "step": 964 + }, + { + "epoch": 4.9, + "learning_rate": 3.031624435668345e-06, + "loss": 0.0152, + "step": 965 + }, + { + "epoch": 4.9, + "learning_rate": 2.861026591815141e-06, + "loss": 0.0139, + "step": 966 + }, + { + "epoch": 4.91, + "learning_rate": 2.6953549941900467e-06, + "loss": 0.0061, + "step": 967 + }, + { + "epoch": 4.91, + "learning_rate": 2.5346112844650826e-06, + "loss": 0.0033, + "step": 968 + }, + { + "epoch": 4.92, + "learning_rate": 2.378797055480608e-06, + "loss": 0.0026, + "step": 969 + }, + { + "epoch": 4.92, + "learning_rate": 2.227913851230057e-06, + "loss": 0.0067, + "step": 970 + }, + { + "epoch": 4.93, + "learning_rate": 2.081963166844225e-06, + "loss": 0.0045, + "step": 971 + }, + { + "epoch": 4.93, + "learning_rate": 1.9409464485766747e-06, + "loss": 0.0126, + "step": 972 + }, + { + "epoch": 4.94, + "learning_rate": 1.804865093789354e-06, + "loss": 0.0036, + "step": 973 + }, + { + "epoch": 4.94, + "learning_rate": 1.6737204509387205e-06, + "loss": 0.0019, + "step": 974 + }, + { + "epoch": 4.95, + "learning_rate": 1.5475138195623628e-06, + "loss": 0.0085, + "step": 975 + }, + { + "epoch": 4.95, + "learning_rate": 1.4262464502663442e-06, + "loss": 0.0063, + "step": 976 + }, + { + "epoch": 4.96, + "learning_rate": 1.309919544712268e-06, + "loss": 0.0052, + "step": 977 + }, + { + "epoch": 4.96, + "learning_rate": 1.1985342556060653e-06, + "loss": 0.0166, + "step": 978 + }, + { + "epoch": 4.97, + "learning_rate": 1.092091686686114e-06, + "loss": 0.0158, + "step": 979 + }, + { + "epoch": 4.97, + "learning_rate": 9.905928927123608e-07, + "loss": 0.0166, + "step": 980 + }, + { + "epoch": 4.98, + "learning_rate": 8.94038879455994e-07, + "loss": 0.0074, + "step": 981 + }, + { + "epoch": 4.98, + "learning_rate": 8.024306036893969e-07, + "loss": 0.0084, + "step": 982 + }, + { + "epoch": 4.99, + "learning_rate": 7.157689731767669e-07, + "loss": 0.0071, + "step": 983 + }, + { + "epoch": 4.99, + "learning_rate": 6.340548466648444e-07, + "loss": 0.0031, + "step": 984 + }, + { + "epoch": 5.0, + "learning_rate": 5.572890338748082e-07, + "loss": 0.0131, + "step": 985 + }, + { + "epoch": 5.01, + "learning_rate": 4.854722954938385e-07, + "loss": 0.0016, + "step": 986 + }, + { + "epoch": 5.01, + "learning_rate": 4.1860534316801037e-07, + "loss": 0.0054, + "step": 987 + }, + { + "epoch": 5.02, + "learning_rate": 3.566888394948009e-07, + "loss": 0.0064, + "step": 988 + }, + { + "epoch": 5.02, + "learning_rate": 2.997233980168157e-07, + "loss": 0.0024, + "step": 989 + }, + { + "epoch": 5.03, + "learning_rate": 2.477095832156828e-07, + "loss": 0.004, + "step": 990 + }, + { + "epoch": 5.03, + "learning_rate": 2.0064791050633523e-07, + "loss": 0.0041, + "step": 991 + }, + { + "epoch": 5.04, + "learning_rate": 1.5853884623195924e-07, + "loss": 0.0206, + "step": 992 + }, + { + "epoch": 5.04, + "learning_rate": 1.2138280765944253e-07, + "loss": 0.002, + "step": 993 + }, + { + "epoch": 5.05, + "learning_rate": 8.918016297515542e-08, + "loss": 0.005, + "step": 994 + }, + { + "epoch": 5.05, + "learning_rate": 6.193123128134249e-08, + "loss": 0.0032, + "step": 995 + }, + { + "epoch": 5.06, + "learning_rate": 3.963628259290308e-08, + "loss": 0.0051, + "step": 996 + }, + { + "epoch": 5.06, + "learning_rate": 2.229553783478222e-08, + "loss": 0.0031, + "step": 997 + }, + { + "epoch": 5.07, + "learning_rate": 9.90916883986115e-09, + "loss": 0.0042, + "step": 998 + }, + { + "epoch": 5.07, + "learning_rate": 2.4772983469589784e-09, + "loss": 0.0022, + "step": 999 + }, + { + "epoch": 5.08, + "learning_rate": 0.0, + "loss": 0.0035, + "step": 1000 + }, + { + "epoch": 5.08, + "step": 1000, + "total_flos": 4.507227485768909e+16, + "train_loss": 0.2918202262979466, + "train_runtime": 2044.5236, + "train_samples_per_second": 1.956, + "train_steps_per_second": 0.489 + } + ], + "max_steps": 1000, + "num_train_epochs": 6, + "total_flos": 4.507227485768909e+16, + "trial_name": null, + "trial_params": null +}