| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9965075669383, | |
| "global_step": 1287, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.905, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.9999880457421163e-05, | |
| "loss": 0.6497, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.9999521832542736e-05, | |
| "loss": 0.6121, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.9998924133938902e-05, | |
| "loss": 0.7236, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.9998087375899756e-05, | |
| "loss": 0.7515, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9997011578430938e-05, | |
| "loss": 0.7073, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9995696767253165e-05, | |
| "loss": 0.6146, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9994142973801627e-05, | |
| "loss": 0.5923, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9992350235225215e-05, | |
| "loss": 0.5629, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.999031859438565e-05, | |
| "loss": 0.5383, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9988048099856443e-05, | |
| "loss": 0.516, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9985538805921757e-05, | |
| "loss": 0.5035, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.998279077257508e-05, | |
| "loss": 0.5244, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9979804065517808e-05, | |
| "loss": 0.486, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9976578756157684e-05, | |
| "loss": 0.4945, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9973114921607055e-05, | |
| "loss": 0.4966, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9969412644681077e-05, | |
| "loss": 0.4935, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9965472013895685e-05, | |
| "loss": 0.4739, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.996129312346552e-05, | |
| "loss": 0.4913, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9956876073301645e-05, | |
| "loss": 0.4641, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9952220969009175e-05, | |
| "loss": 0.4691, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9947327921884746e-05, | |
| "loss": 0.4666, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.994219704891385e-05, | |
| "loss": 0.4501, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9936828472768043e-05, | |
| "loss": 0.4558, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9931222321802016e-05, | |
| "loss": 0.4712, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9925378730050518e-05, | |
| "loss": 0.4661, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9919297837225152e-05, | |
| "loss": 0.4735, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9912979788711042e-05, | |
| "loss": 0.4526, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.990642473556335e-05, | |
| "loss": 0.4453, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9899632834503662e-05, | |
| "loss": 0.4713, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.989260424791626e-05, | |
| "loss": 0.4622, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9885339143844217e-05, | |
| "loss": 0.4585, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.987783769598538e-05, | |
| "loss": 0.4576, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9870100083688242e-05, | |
| "loss": 0.4353, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9862126491947624e-05, | |
| "loss": 0.4509, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.985391711140027e-05, | |
| "loss": 0.4402, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9845472138320282e-05, | |
| "loss": 0.437, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.9836791774614437e-05, | |
| "loss": 0.4613, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.982787622781735e-05, | |
| "loss": 0.4567, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9818725711086506e-05, | |
| "loss": 0.4541, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.980934044319718e-05, | |
| "loss": 0.4398, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9799720648537197e-05, | |
| "loss": 0.4283, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.978986655710157e-05, | |
| "loss": 0.4443, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9779778404487e-05, | |
| "loss": 0.4457, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.9769456431886244e-05, | |
| "loss": 0.4326, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.9758900886082343e-05, | |
| "loss": 0.4557, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9748112019442734e-05, | |
| "loss": 0.4402, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9737090089913205e-05, | |
| "loss": 0.465, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9725835361011726e-05, | |
| "loss": 0.4387, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.971434810182217e-05, | |
| "loss": 0.4479, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.9702628586987846e-05, | |
| "loss": 0.4344, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.9690677096704964e-05, | |
| "loss": 0.4302, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.9678493916715914e-05, | |
| "loss": 0.4331, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.966607933830245e-05, | |
| "loss": 0.4224, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9653433658278717e-05, | |
| "loss": 0.4225, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9640557178984152e-05, | |
| "loss": 0.4177, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.9627450208276265e-05, | |
| "loss": 0.4546, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.9614113059523273e-05, | |
| "loss": 0.4257, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.9600546051596604e-05, | |
| "loss": 0.4453, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9586749508863284e-05, | |
| "loss": 0.458, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9572723761178168e-05, | |
| "loss": 0.4287, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.955846914387607e-05, | |
| "loss": 0.4581, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.954398599776373e-05, | |
| "loss": 0.4343, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.952927466911168e-05, | |
| "loss": 0.4431, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.9514335509645948e-05, | |
| "loss": 0.4332, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9499168876539666e-05, | |
| "loss": 0.4315, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9483775132404517e-05, | |
| "loss": 0.4403, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.946815464528208e-05, | |
| "loss": 0.4618, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.9452307788635015e-05, | |
| "loss": 0.4292, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9436234941338145e-05, | |
| "loss": 0.4333, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9419936487669396e-05, | |
| "loss": 0.4557, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.94034128173006e-05, | |
| "loss": 0.4575, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.938666432528819e-05, | |
| "loss": 0.4012, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.9369691412063755e-05, | |
| "loss": 0.4579, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9352494483424456e-05, | |
| "loss": 0.4337, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9335073950523335e-05, | |
| "loss": 0.4142, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.9317430229859474e-05, | |
| "loss": 0.4545, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.929956374326805e-05, | |
| "loss": 0.4679, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.928147491791024e-05, | |
| "loss": 0.4178, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9263164186263003e-05, | |
| "loss": 0.4474, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.9244631986108768e-05, | |
| "loss": 0.4237, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.922587876052492e-05, | |
| "loss": 0.4456, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.920690495787326e-05, | |
| "loss": 0.412, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.918771103178924e-05, | |
| "loss": 0.4279, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.916829744117115e-05, | |
| "loss": 0.413, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.9148664650169128e-05, | |
| "loss": 0.4508, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9128813128174063e-05, | |
| "loss": 0.4054, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9108743349806382e-05, | |
| "loss": 0.4021, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.90884557949047e-05, | |
| "loss": 0.4392, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.9067950948514343e-05, | |
| "loss": 0.4414, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.904722930087575e-05, | |
| "loss": 0.4327, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.9026291347412765e-05, | |
| "loss": 0.4081, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.900513758872078e-05, | |
| "loss": 0.4432, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.8983768530554765e-05, | |
| "loss": 0.4355, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.8962184683817182e-05, | |
| "loss": 0.4292, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.8940386564545773e-05, | |
| "loss": 0.4182, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.891837469390122e-05, | |
| "loss": 0.4402, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.8896149598154675e-05, | |
| "loss": 0.4377, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.887371180867519e-05, | |
| "loss": 0.4236, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.8851061861917013e-05, | |
| "loss": 0.4399, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.8828200299406747e-05, | |
| "loss": 0.4285, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.8805127667730426e-05, | |
| "loss": 0.4465, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.878184451852042e-05, | |
| "loss": 0.4264, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.8758351408442278e-05, | |
| "loss": 0.4196, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.8734648899181388e-05, | |
| "loss": 0.4104, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.871073755742957e-05, | |
| "loss": 0.4188, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.868661795487151e-05, | |
| "loss": 0.4418, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.8662290668171107e-05, | |
| "loss": 0.4183, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.8637756278957683e-05, | |
| "loss": 0.4076, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.8613015373812066e-05, | |
| "loss": 0.4105, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.8588068544252572e-05, | |
| "loss": 0.4478, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.8562916386720883e-05, | |
| "loss": 0.4312, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.853755950256774e-05, | |
| "loss": 0.4044, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.8511998498038615e-05, | |
| "loss": 0.4069, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8486233984259186e-05, | |
| "loss": 0.4349, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8460266577220733e-05, | |
| "loss": 0.4039, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8434096897765422e-05, | |
| "loss": 0.4153, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.8407725571571448e-05, | |
| "loss": 0.4188, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.838115322913807e-05, | |
| "loss": 0.4409, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.835438050577057e-05, | |
| "loss": 0.4109, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.8327408041565013e-05, | |
| "loss": 0.4247, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.8300236481392995e-05, | |
| "loss": 0.4451, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.8272866474886185e-05, | |
| "loss": 0.4127, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.8245298676420814e-05, | |
| "loss": 0.4346, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.8217533745102032e-05, | |
| "loss": 0.4078, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.818957234474813e-05, | |
| "loss": 0.4034, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.81614151438747e-05, | |
| "loss": 0.4355, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.8133062815678614e-05, | |
| "loss": 0.446, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.810451603802196e-05, | |
| "loss": 0.4329, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.807577549341582e-05, | |
| "loss": 0.4387, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8046841869003962e-05, | |
| "loss": 0.4001, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.8017715856546397e-05, | |
| "loss": 0.4109, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.7988398152402857e-05, | |
| "loss": 0.4156, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.7958889457516134e-05, | |
| "loss": 0.4121, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.7929190477395318e-05, | |
| "loss": 0.4187, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.7899301922098958e-05, | |
| "loss": 0.4072, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7869224506218034e-05, | |
| "loss": 0.4556, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7838958948858923e-05, | |
| "loss": 0.4135, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7808505973626183e-05, | |
| "loss": 0.4384, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.777786630860525e-05, | |
| "loss": 0.4226, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.774704068634504e-05, | |
| "loss": 0.4362, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.771602984384043e-05, | |
| "loss": 0.4243, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.7684834522514632e-05, | |
| "loss": 0.4622, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.7653455468201483e-05, | |
| "loss": 0.448, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7621893431127596e-05, | |
| "loss": 0.4385, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.759014916589443e-05, | |
| "loss": 0.4149, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7558223431460254e-05, | |
| "loss": 0.4229, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.7526116991121988e-05, | |
| "loss": 0.4115, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.7493830612496975e-05, | |
| "loss": 0.4204, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.7461365067504602e-05, | |
| "loss": 0.4171, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.7428721132347863e-05, | |
| "loss": 0.4161, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.73958995874948e-05, | |
| "loss": 0.4214, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.7362901217659833e-05, | |
| "loss": 0.4175, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.7329726811785012e-05, | |
| "loss": 0.4105, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.7296377163021133e-05, | |
| "loss": 0.4354, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.7262853068708807e-05, | |
| "loss": 0.4113, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.7229155330359368e-05, | |
| "loss": 0.452, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.719528475363573e-05, | |
| "loss": 0.4154, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.7161242148333107e-05, | |
| "loss": 0.4236, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.712702832835966e-05, | |
| "loss": 0.4146, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7092644111717052e-05, | |
| "loss": 0.4183, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.7058090320480866e-05, | |
| "loss": 0.4038, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.702336778078096e-05, | |
| "loss": 0.4135, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.698847732278173e-05, | |
| "loss": 0.408, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.6953419780662232e-05, | |
| "loss": 0.4003, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.6918195992596274e-05, | |
| "loss": 0.4065, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.6882806800732338e-05, | |
| "loss": 0.4205, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.6847253051173487e-05, | |
| "loss": 0.4135, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.6811535593957093e-05, | |
| "loss": 0.3965, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.6775655283034548e-05, | |
| "loss": 0.4028, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.6739612976250836e-05, | |
| "loss": 0.4578, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.670340953532401e-05, | |
| "loss": 0.4298, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.6667045825824616e-05, | |
| "loss": 0.4221, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.663052271715497e-05, | |
| "loss": 0.4062, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.6593841082528394e-05, | |
| "loss": 0.3934, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6557001798948324e-05, | |
| "loss": 0.4279, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6520005747187358e-05, | |
| "loss": 0.3993, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.648285381176618e-05, | |
| "loss": 0.4191, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6445546880932425e-05, | |
| "loss": 0.4198, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6408085846639435e-05, | |
| "loss": 0.4, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.637047160452494e-05, | |
| "loss": 0.4347, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.6332705053889643e-05, | |
| "loss": 0.4188, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.6294787097675712e-05, | |
| "loss": 0.4052, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.6256718642445202e-05, | |
| "loss": 0.4214, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.6218500598358376e-05, | |
| "loss": 0.4283, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.6180133879151943e-05, | |
| "loss": 0.4188, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.6141619402117213e-05, | |
| "loss": 0.3989, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.6102958088078172e-05, | |
| "loss": 0.4126, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.606415086136945e-05, | |
| "loss": 0.4148, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.6025198649814243e-05, | |
| "loss": 0.42, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.5986102384702112e-05, | |
| "loss": 0.4398, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.594686300076673e-05, | |
| "loss": 0.3987, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.590748143616353e-05, | |
| "loss": 0.4313, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.5867958632447263e-05, | |
| "loss": 0.4214, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.582829553454951e-05, | |
| "loss": 0.4066, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.5788493090756074e-05, | |
| "loss": 0.4064, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.5748552252684303e-05, | |
| "loss": 0.4109, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.5708473975260356e-05, | |
| "loss": 0.4282, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.5668259216696366e-05, | |
| "loss": 0.4358, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.5627908938467516e-05, | |
| "loss": 0.4303, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.558742410528907e-05, | |
| "loss": 0.4082, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.5546805685093308e-05, | |
| "loss": 0.4041, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.550605464900636e-05, | |
| "loss": 0.4148, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.546517197132502e-05, | |
| "loss": 0.386, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.542415862949343e-05, | |
| "loss": 0.4227, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.5383015604079723e-05, | |
| "loss": 0.4174, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.5341743878752563e-05, | |
| "loss": 0.4302, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5300344440257657e-05, | |
| "loss": 0.4076, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5258818278394125e-05, | |
| "loss": 0.4047, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.5217166385990865e-05, | |
| "loss": 0.4242, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.5175389758882803e-05, | |
| "loss": 0.4032, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.5133489395887089e-05, | |
| "loss": 0.4268, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.509146629877921e-05, | |
| "loss": 0.4132, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.5049321472269043e-05, | |
| "loss": 0.4031, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.5007055923976843e-05, | |
| "loss": 0.3714, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 1.4964670664409136e-05, | |
| "loss": 0.235, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 1.4922166706934566e-05, | |
| "loss": 0.2015, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.4879545067759673e-05, | |
| "loss": 0.2057, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.4836806765904587e-05, | |
| "loss": 0.1876, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.4793952823178676e-05, | |
| "loss": 0.1879, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.4750984264156103e-05, | |
| "loss": 0.1897, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.4707902116151338e-05, | |
| "loss": 0.2166, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.4664707409194598e-05, | |
| "loss": 0.1852, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.462140117600721e-05, | |
| "loss": 0.1909, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.457798445197694e-05, | |
| "loss": 0.1845, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.4534458275133214e-05, | |
| "loss": 0.1772, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.449082368612232e-05, | |
| "loss": 0.1873, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.4447081728182518e-05, | |
| "loss": 0.1983, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.4403233447119096e-05, | |
| "loss": 0.192, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.4359279891279376e-05, | |
| "loss": 0.1808, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.431522211152764e-05, | |
| "loss": 0.1893, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.4271061161220007e-05, | |
| "loss": 0.186, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.4226798096179262e-05, | |
| "loss": 0.1854, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.4182433974669584e-05, | |
| "loss": 0.1736, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.4137969857371277e-05, | |
| "loss": 0.1876, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 1.4093406807355389e-05, | |
| "loss": 0.1904, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 1.4048745890058304e-05, | |
| "loss": 0.1829, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.4003988173256267e-05, | |
| "loss": 0.1835, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.3959134727039854e-05, | |
| "loss": 0.1829, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.3914186623788398e-05, | |
| "loss": 0.1907, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.3869144938144325e-05, | |
| "loss": 0.1842, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.3824010746987495e-05, | |
| "loss": 0.1929, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.3778785129409424e-05, | |
| "loss": 0.1824, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.3733469166687505e-05, | |
| "loss": 0.1867, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.3688063942259141e-05, | |
| "loss": 0.1842, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.3642570541695867e-05, | |
| "loss": 0.1874, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.359699005267736e-05, | |
| "loss": 0.1985, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.3551323564965465e-05, | |
| "loss": 0.1671, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.3505572170378118e-05, | |
| "loss": 0.1861, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.3459736962763263e-05, | |
| "loss": 0.1873, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.3413819037972682e-05, | |
| "loss": 0.1946, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.33678194938358e-05, | |
| "loss": 0.1769, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.332173943013345e-05, | |
| "loss": 0.205, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.327557994857156e-05, | |
| "loss": 0.1899, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.322934215275482e-05, | |
| "loss": 0.1746, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.3183027148160304e-05, | |
| "loss": 0.1843, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.3136636042111025e-05, | |
| "loss": 0.165, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.3090169943749475e-05, | |
| "loss": 0.189, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.3043629964011104e-05, | |
| "loss": 0.1745, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.2997017215597743e-05, | |
| "loss": 0.1829, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.295033281295103e-05, | |
| "loss": 0.1871, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.2903577872225737e-05, | |
| "loss": 0.1786, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.2856753511263105e-05, | |
| "loss": 0.1759, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.2809860849564103e-05, | |
| "loss": 0.2027, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.2762901008262678e-05, | |
| "loss": 0.1824, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.271587511009893e-05, | |
| "loss": 0.1805, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.2668784279392287e-05, | |
| "loss": 0.1777, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.2621629642014623e-05, | |
| "loss": 0.1873, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.2574412325363326e-05, | |
| "loss": 0.184, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.2527133458334353e-05, | |
| "loss": 0.1932, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.2479794171295248e-05, | |
| "loss": 0.1875, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.2432395596058097e-05, | |
| "loss": 0.1853, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.2384938865852482e-05, | |
| "loss": 0.1815, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.2337425115298389e-05, | |
| "loss": 0.1845, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.2289855480379074e-05, | |
| "loss": 0.1953, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.22422310984139e-05, | |
| "loss": 0.1886, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.2194553108031153e-05, | |
| "loss": 0.1875, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.214682264914082e-05, | |
| "loss": 0.1829, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.2099040862907332e-05, | |
| "loss": 0.1935, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.2051208891722274e-05, | |
| "loss": 0.1851, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.2003327879177085e-05, | |
| "loss": 0.1991, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.195539897003571e-05, | |
| "loss": 0.1927, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.190742331020723e-05, | |
| "loss": 0.183, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.185940204671846e-05, | |
| "loss": 0.201, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.1811336327686537e-05, | |
| "loss": 0.198, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.1763227302291464e-05, | |
| "loss": 0.185, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.1715076120748631e-05, | |
| "loss": 0.179, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.1666883934281324e-05, | |
| "loss": 0.1934, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.1618651895093192e-05, | |
| "loss": 0.1996, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.1570381156340701e-05, | |
| "loss": 0.1813, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.1522072872105576e-05, | |
| "loss": 0.1874, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.147372819736719e-05, | |
| "loss": 0.1773, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.1425348287974956e-05, | |
| "loss": 0.1912, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.1376934300620706e-05, | |
| "loss": 0.1949, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.1328487392811019e-05, | |
| "loss": 0.1883, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.1280008722839552e-05, | |
| "loss": 0.1766, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.1231499449759355e-05, | |
| "loss": 0.1987, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.1182960733355142e-05, | |
| "loss": 0.1785, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.1134393734115587e-05, | |
| "loss": 0.1961, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.1085799613205552e-05, | |
| "loss": 0.1805, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.1037179532438345e-05, | |
| "loss": 0.1745, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.098853465424793e-05, | |
| "loss": 0.1904, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.0939866141661148e-05, | |
| "loss": 0.1858, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.08911751582699e-05, | |
| "loss": 0.2095, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.0842462868203329e-05, | |
| "loss": 0.1935, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.079373043609999e-05, | |
| "loss": 0.1807, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.0744979027080003e-05, | |
| "loss": 0.194, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.06962098067172e-05, | |
| "loss": 0.196, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.0647423941011255e-05, | |
| "loss": 0.1916, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.0598622596359808e-05, | |
| "loss": 0.1904, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.054980693953058e-05, | |
| "loss": 0.1766, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.0500978137633469e-05, | |
| "loss": 0.1946, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.0452137358092654e-05, | |
| "loss": 0.1918, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.0403285768618682e-05, | |
| "loss": 0.1813, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.0354424537180554e-05, | |
| "loss": 0.1879, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.0305554831977788e-05, | |
| "loss": 0.1857, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.0256677821412508e-05, | |
| "loss": 0.1949, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.0207794674061483e-05, | |
| "loss": 0.209, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.015890655864822e-05, | |
| "loss": 0.2652, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.0110014644014994e-05, | |
| "loss": 0.263, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.0061120099094917e-05, | |
| "loss": 0.2231, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.0012224092883986e-05, | |
| "loss": 0.2141, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 9.963327794413137e-06, | |
| "loss": 0.2057, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 9.914432372720294e-06, | |
| "loss": 0.2352, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 9.865538996822418e-06, | |
| "loss": 0.2138, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 9.816648835687557e-06, | |
| "loss": 0.2054, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 9.767763058206897e-06, | |
| "loss": 0.2073, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.718882833166823e-06, | |
| "loss": 0.2001, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.670009329220963e-06, | |
| "loss": 0.1985, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 9.62114371486226e-06, | |
| "loss": 0.2006, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 9.572287158395025e-06, | |
| "loss": 0.2005, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.523440827907006e-06, | |
| "loss": 0.1974, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.474605891241465e-06, | |
| "loss": 0.207, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.425783515969258e-06, | |
| "loss": 0.1863, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.376974869360918e-06, | |
| "loss": 0.2004, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.328181118358734e-06, | |
| "loss": 0.1884, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.279403429548877e-06, | |
| "loss": 0.1884, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.230642969133483e-06, | |
| "loss": 0.1939, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 9.181900902902794e-06, | |
| "loss": 0.2122, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 9.13317839620727e-06, | |
| "loss": 0.197, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 9.084476613929726e-06, | |
| "loss": 0.1765, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 9.035796720457495e-06, | |
| "loss": 0.1879, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.987139879654575e-06, | |
| "loss": 0.189, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.938507254833811e-06, | |
| "loss": 0.1925, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 8.889900008729084e-06, | |
| "loss": 0.197, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 8.841319303467502e-06, | |
| "loss": 0.1954, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 8.792766300541622e-06, | |
| "loss": 0.1815, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 8.744242160781682e-06, | |
| "loss": 0.1914, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 8.69574804432784e-06, | |
| "loss": 0.186, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 8.647285110602443e-06, | |
| "loss": 0.1937, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 8.59885451828231e-06, | |
| "loss": 0.198, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 8.550457425271022e-06, | |
| "loss": 0.1819, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 8.502094988671232e-06, | |
| "loss": 0.2001, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.453768364757027e-06, | |
| "loss": 0.1704, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.405478708946254e-06, | |
| "loss": 0.1873, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.35722717577291e-06, | |
| "loss": 0.1771, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.309014918859538e-06, | |
| "loss": 0.1843, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.26084309088964e-06, | |
| "loss": 0.1808, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.212712843580124e-06, | |
| "loss": 0.2045, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.164625327653772e-06, | |
| "loss": 0.1799, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.116581692811711e-06, | |
| "loss": 0.1838, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.068583087705946e-06, | |
| "loss": 0.1923, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.020630659911881e-06, | |
| "loss": 0.1827, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.972725555900895e-06, | |
| "loss": 0.1819, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.924868921012918e-06, | |
| "loss": 0.1824, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.877061899429067e-06, | |
| "loss": 0.1973, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.829305634144264e-06, | |
| "loss": 0.183, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.781601266939936e-06, | |
| "loss": 0.1652, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.733949938356695e-06, | |
| "loss": 0.1895, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.686352787667083e-06, | |
| "loss": 0.1845, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.638810952848328e-06, | |
| "loss": 0.1894, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.591325570555136e-06, | |
| "loss": 0.1707, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.543897776092519e-06, | |
| "loss": 0.1776, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.496528703388648e-06, | |
| "loss": 0.1788, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 7.449219484967749e-06, | |
| "loss": 0.1777, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 7.401971251923015e-06, | |
| "loss": 0.183, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 7.354785133889566e-06, | |
| "loss": 0.1857, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 7.307662259017454e-06, | |
| "loss": 0.1892, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 7.260603753944674e-06, | |
| "loss": 0.1785, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 7.213610743770234e-06, | |
| "loss": 0.1884, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 7.166684352027265e-06, | |
| "loss": 0.1773, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 7.119825700656138e-06, | |
| "loss": 0.1862, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 7.073035909977661e-06, | |
| "loss": 0.1872, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 7.026316098666282e-06, | |
| "loss": 0.1917, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 6.979667383723345e-06, | |
| "loss": 0.1823, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 6.9330908804503874e-06, | |
| "loss": 0.179, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 6.886587702422474e-06, | |
| "loss": 0.1731, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 6.840158961461567e-06, | |
| "loss": 0.1843, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 6.793805767609953e-06, | |
| "loss": 0.1789, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 6.7475292291037e-06, | |
| "loss": 0.1851, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 6.701330452346156e-06, | |
| "loss": 0.1795, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 6.655210541881502e-06, | |
| "loss": 0.1907, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 6.609170600368346e-06, | |
| "loss": 0.1885, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 6.56321172855336e-06, | |
| "loss": 0.1804, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 6.51733502524495e-06, | |
| "loss": 0.184, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 6.471541587287003e-06, | |
| "loss": 0.186, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 6.425832509532652e-06, | |
| "loss": 0.167, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 6.380208884818104e-06, | |
| "loss": 0.1728, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 6.3346718039365076e-06, | |
| "loss": 0.1765, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 6.289222355611881e-06, | |
| "loss": 0.1813, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 6.243861626473073e-06, | |
| "loss": 0.1875, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 6.198590701027796e-06, | |
| "loss": 0.1829, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 6.153410661636683e-06, | |
| "loss": 0.1803, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 6.108322588487419e-06, | |
| "loss": 0.1768, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 6.063327559568908e-06, | |
| "loss": 0.1764, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 6.0184266506455125e-06, | |
| "loss": 0.1818, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 5.973620935231318e-06, | |
| "loss": 0.1834, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 5.928911484564481e-06, | |
| "loss": 0.1682, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 5.884299367581607e-06, | |
| "loss": 0.1828, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 5.8397856508922e-06, | |
| "loss": 0.1802, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 5.795371398753153e-06, | |
| "loss": 0.1949, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 5.751057673043316e-06, | |
| "loss": 0.1777, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 5.706845533238097e-06, | |
| "loss": 0.1728, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 5.662736036384142e-06, | |
| "loss": 0.1701, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 5.618730237074048e-06, | |
| "loss": 0.1667, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5.574829187421166e-06, | |
| "loss": 0.1746, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5.531033937034429e-06, | |
| "loss": 0.1827, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5.4873455329932736e-06, | |
| "loss": 0.1769, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5.443765019822593e-06, | |
| "loss": 0.1854, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 5.400293439467781e-06, | |
| "loss": 0.1921, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 5.356931831269798e-06, | |
| "loss": 0.1815, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5.313681231940338e-06, | |
| "loss": 0.1781, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5.270542675537034e-06, | |
| "loss": 0.2022, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5.227517193438746e-06, | |
| "loss": 0.1866, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5.184605814320889e-06, | |
| "loss": 0.1754, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5.141809564130847e-06, | |
| "loss": 0.1745, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 5.099129466063444e-06, | |
| "loss": 0.1803, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 5.056566540536476e-06, | |
| "loss": 0.1678, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 5.014121805166321e-06, | |
| "loss": 0.1702, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.971796274743601e-06, | |
| "loss": 0.1313, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.9295909612089265e-06, | |
| "loss": 0.0643, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.887506873628708e-06, | |
| "loss": 0.0624, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 4.845545018171013e-06, | |
| "loss": 0.0604, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 4.80370639808152e-06, | |
| "loss": 0.0648, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.7619920136595465e-06, | |
| "loss": 0.0731, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.720402862234105e-06, | |
| "loss": 0.0582, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.678939938140079e-06, | |
| "loss": 0.0601, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.637604232694441e-06, | |
| "loss": 0.0527, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.596396734172559e-06, | |
| "loss": 0.0575, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.555318427784561e-06, | |
| "loss": 0.0578, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.514370295651781e-06, | |
| "loss": 0.0543, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.473553316783282e-06, | |
| "loss": 0.0547, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 4.432868467052449e-06, | |
| "loss": 0.053, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 4.392316719173651e-06, | |
| "loss": 0.0587, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 4.351899042678993e-06, | |
| "loss": 0.0628, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.311616403895126e-06, | |
| "loss": 0.0582, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.271469765920163e-06, | |
| "loss": 0.0578, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.231460088600626e-06, | |
| "loss": 0.064, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.191588328508518e-06, | |
| "loss": 0.0525, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.1518554389184416e-06, | |
| "loss": 0.0584, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.1122623697848164e-06, | |
| "loss": 0.0621, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 4.0728100677191585e-06, | |
| "loss": 0.0563, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.033499475967451e-06, | |
| "loss": 0.0598, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.994331534387602e-06, | |
| "loss": 0.0528, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.95530717942696e-06, | |
| "loss": 0.0588, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.916427344099928e-06, | |
| "loss": 0.0668, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.877692957965663e-06, | |
| "loss": 0.0569, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.839104947105847e-06, | |
| "loss": 0.0588, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.8006642341025456e-06, | |
| "loss": 0.0594, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 3.762371738016153e-06, | |
| "loss": 0.059, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.72422837436341e-06, | |
| "loss": 0.0523, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 3.686235055095536e-06, | |
| "loss": 0.0538, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.648392688576401e-06, | |
| "loss": 0.0586, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.610702179560821e-06, | |
| "loss": 0.055, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.573164429172924e-06, | |
| "loss": 0.0524, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.5357803348846087e-06, | |
| "loss": 0.0618, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.498550790494083e-06, | |
| "loss": 0.0527, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.461476686104495e-06, | |
| "loss": 0.0541, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.424558908102653e-06, | |
| "loss": 0.0579, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.387798339137837e-06, | |
| "loss": 0.0567, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.3511958581006874e-06, | |
| "loss": 0.0519, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.314752340102201e-06, | |
| "loss": 0.0573, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.278468656452798e-06, | |
| "loss": 0.061, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.242345674641508e-06, | |
| "loss": 0.0611, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.2063842583152095e-06, | |
| "loss": 0.0604, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.1705852672579853e-06, | |
| "loss": 0.0556, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.134949557370587e-06, | |
| "loss": 0.0557, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.099477980649941e-06, | |
| "loss": 0.0539, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.0641713851687994e-06, | |
| "loss": 0.061, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.0290306150554573e-06, | |
| "loss": 0.0566, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.994056510473571e-06, | |
| "loss": 0.0631, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.959249907602071e-06, | |
| "loss": 0.052, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.9246116386151704e-06, | |
| "loss": 0.0553, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.890142531662471e-06, | |
| "loss": 0.0578, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.8558434108491585e-06, | |
| "loss": 0.0522, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.8217150962163044e-06, | |
| "loss": 0.0575, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 2.7877584037212555e-06, | |
| "loss": 0.0615, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 2.75397414521813e-06, | |
| "loss": 0.0512, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 2.720363128438408e-06, | |
| "loss": 0.0595, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 2.6869261569716134e-06, | |
| "loss": 0.0557, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.6536640302461036e-06, | |
| "loss": 0.0605, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.6205775435099624e-06, | |
| "loss": 0.0548, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.5876674878119735e-06, | |
| "loss": 0.0544, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.554934649982731e-06, | |
| "loss": 0.0566, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.5223798126158004e-06, | |
| "loss": 0.055, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.490003754049024e-06, | |
| "loss": 0.0545, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.457807248345908e-06, | |
| "loss": 0.0611, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.425791065277119e-06, | |
| "loss": 0.0558, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.393955970302072e-06, | |
| "loss": 0.0699, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.362302724550639e-06, | |
| "loss": 0.0589, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.3308320848049436e-06, | |
| "loss": 0.0584, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.299544803481274e-06, | |
| "loss": 0.0524, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.2684416286120846e-06, | |
| "loss": 0.0595, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.23752330382813e-06, | |
| "loss": 0.0551, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.20679056834066e-06, | |
| "loss": 0.0523, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.176244156923768e-06, | |
| "loss": 0.0913, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.1458847998968123e-06, | |
| "loss": 0.0561, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.115713223106959e-06, | |
| "loss": 0.0562, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.0857301479118276e-06, | |
| "loss": 0.0574, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.0559362911622438e-06, | |
| "loss": 0.0641, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 2.026332365185102e-06, | |
| "loss": 0.0569, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.996919077766334e-06, | |
| "loss": 0.0656, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.967697132133981e-06, | |
| "loss": 0.0508, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.9386672269413976e-06, | |
| "loss": 0.0533, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.9098300562505266e-06, | |
| "loss": 0.0555, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.8811863095153182e-06, | |
| "loss": 0.0522, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.852736671565244e-06, | |
| "loss": 0.0541, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.8244818225889183e-06, | |
| "loss": 0.053, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.7964224381178474e-06, | |
| "loss": 0.0514, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.768559189010267e-06, | |
| "loss": 0.0513, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.7408927414351051e-06, | |
| "loss": 0.0546, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.7134237568560619e-06, | |
| "loss": 0.0515, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.6861528920157877e-06, | |
| "loss": 0.0559, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.6590807989201841e-06, | |
| "loss": 0.0594, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.632208124822815e-06, | |
| "loss": 0.0527, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.6055355122094352e-06, | |
| "loss": 0.0503, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.579063598782622e-06, | |
| "loss": 0.0534, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.5527930174465356e-06, | |
| "loss": 0.0639, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.5267243962917833e-06, | |
| "loss": 0.0575, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.5008583585804048e-06, | |
| "loss": 0.052, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.4751955227309722e-06, | |
| "loss": 0.0532, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.4497365023038012e-06, | |
| "loss": 0.0542, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.4244819059862824e-06, | |
| "loss": 0.0525, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 1.399432337578327e-06, | |
| "loss": 0.0588, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 1.3745883959779415e-06, | |
| "loss": 0.0552, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.3499506751668933e-06, | |
| "loss": 0.0535, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.325519764196519e-06, | |
| "loss": 0.0496, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 1.301296247173638e-06, | |
| "loss": 0.0536, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 1.2772807032465895e-06, | |
| "loss": 0.0546, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 1.2534737065913839e-06, | |
| "loss": 0.062, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.229875826397976e-06, | |
| "loss": 0.0482, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.2064876268566572e-06, | |
| "loss": 0.0526, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.1833096671445644e-06, | |
| "loss": 0.0513, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.1603425014123126e-06, | |
| "loss": 0.06, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.1375866787707435e-06, | |
| "loss": 0.0553, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.1150427432778078e-06, | |
| "loss": 0.0504, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.0927112339255374e-06, | |
| "loss": 0.0512, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.0705926846271787e-06, | |
| "loss": 0.05, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.0486876242044153e-06, | |
| "loss": 0.0577, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 1.0269965763747292e-06, | |
| "loss": 0.0533, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 1.0055200597388793e-06, | |
| "loss": 0.0556, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 9.84258587768504e-07, | |
| "loss": 0.0501, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.632126687938392e-07, | |
| "loss": 0.0472, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.423828059915685e-07, | |
| "loss": 0.0637, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.217694973728009e-07, | |
| "loss": 0.0508, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 9.013732357711469e-07, | |
| "loss": 0.0614, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 8.811945088309493e-07, | |
| "loss": 0.0534, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 8.612337989956199e-07, | |
| "loss": 0.0569, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 8.414915834961035e-07, | |
| "loss": 0.053, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 8.219683343394691e-07, | |
| "loss": 0.0554, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 8.0266451829763e-07, | |
| "loss": 0.0538, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.835805968961762e-07, | |
| "loss": 0.0529, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.647170264033422e-07, | |
| "loss": 0.0535, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 7.460742578191016e-07, | |
| "loss": 0.0457, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 7.276527368643793e-07, | |
| "loss": 0.0531, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 7.094529039704013e-07, | |
| "loss": 0.052, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.914751942681585e-07, | |
| "loss": 0.0527, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.737200375780073e-07, | |
| "loss": 0.0555, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.561878583993897e-07, | |
| "loss": 0.0502, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.388790759006902e-07, | |
| "loss": 0.0502, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.217941039092068e-07, | |
| "loss": 0.0602, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.049333509012611e-07, | |
| "loss": 0.0564, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.882972199924353e-07, | |
| "loss": 0.0578, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.718861089279249e-07, | |
| "loss": 0.0553, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.557004100730357e-07, | |
| "loss": 0.0531, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.39740510403809e-07, | |
| "loss": 0.0499, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.240067914977554e-07, | |
| "loss": 0.054, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.084996295247402e-07, | |
| "loss": 0.0512, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.932193952379915e-07, | |
| "loss": 0.0537, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.781664539652319e-07, | |
| "loss": 0.0541, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.633411655999431e-07, | |
| "loss": 0.0515, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.487438845927683e-07, | |
| "loss": 0.054, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.34374959943028e-07, | |
| "loss": 0.0516, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.202347351903857e-07, | |
| "loss": 0.0551, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.063235484066275e-07, | |
| "loss": 0.0498, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.9264173218758083e-07, | |
| "loss": 0.0523, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 3.791896136451656e-07, | |
| "loss": 0.0535, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.6596751439957003e-07, | |
| "loss": 0.0515, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.5297575057156255e-07, | |
| "loss": 0.0642, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.4021463277493337e-07, | |
| "loss": 0.0576, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.2768446610906834e-07, | |
| "loss": 0.0512, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.153855501516545e-07, | |
| "loss": 0.0497, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.0331817895151827e-07, | |
| "loss": 0.0493, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.9148264102159316e-07, | |
| "loss": 0.0526, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.7987921933202655e-07, | |
| "loss": 0.0616, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.685081913034082e-07, | |
| "loss": 0.0558, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.573698288001403e-07, | |
| "loss": 0.0537, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.46464398123939e-07, | |
| "loss": 0.0549, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.3579216000746418e-07, | |
| "loss": 0.0523, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.2535336960809118e-07, | |
| "loss": 0.0558, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.1514827650180425e-07, | |
| "loss": 0.0488, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.051771246772305e-07, | |
| "loss": 0.052, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.954401525298144e-07, | |
| "loss": 0.0518, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.859375928561058e-07, | |
| "loss": 0.0477, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.7666967284820202e-07, | |
| "loss": 0.055, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.6763661408831677e-07, | |
| "loss": 0.0534, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.5883863254347653e-07, | |
| "loss": 0.0522, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.5027593856036137e-07, | |
| "loss": 0.0518, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.4194873686027566e-07, | |
| "loss": 0.0484, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.3385722653425304e-07, | |
| "loss": 0.0571, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.2600160103829584e-07, | |
| "loss": 0.048, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.1838204818874877e-07, | |
| "loss": 0.0521, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.1099875015781359e-07, | |
| "loss": 0.0546, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 1.0385188346918485e-07, | |
| "loss": 0.0492, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 9.694161899383992e-08, | |
| "loss": 0.0555, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 9.026812194594448e-08, | |
| "loss": 0.0494, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 8.383155187890901e-08, | |
| "loss": 0.0527, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 7.763206268156964e-08, | |
| "loss": 0.0527, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 7.166980257451106e-08, | |
| "loss": 0.0618, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 6.594491410652493e-08, | |
| "loss": 0.0506, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 6.045753415119593e-08, | |
| "loss": 0.0577, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 5.520779390363551e-08, | |
| "loss": 0.0495, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 5.019581887733993e-08, | |
| "loss": 0.0563, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 4.542172890119267e-08, | |
| "loss": 0.0556, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 4.0885638116601176e-08, | |
| "loss": 0.0518, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.6587654974761246e-08, | |
| "loss": 0.0517, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.252788223407244e-08, | |
| "loss": 0.0531, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.870641695767451e-08, | |
| "loss": 0.0525, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.5123350511129242e-08, | |
| "loss": 0.0609, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.177876856023997e-08, | |
| "loss": 0.0578, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.8672751068995464e-08, | |
| "loss": 0.0544, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.5805372297662546e-08, | |
| "loss": 0.0557, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.3176700801014186e-08, | |
| "loss": 0.0509, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.0786799426683037e-08, | |
| "loss": 0.0564, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 8.635725313663745e-09, | |
| "loss": 0.055, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.723529890946268e-09, | |
| "loss": 0.0494, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.05025887628352e-09, | |
| "loss": 0.0501, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.615952275104473e-09, | |
| "loss": 0.0505, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.420644379549364e-09, | |
| "loss": 0.0515, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 1.4643637676559074e-09, | |
| "loss": 0.2096, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 7.471333026742856e-10, | |
| "loss": 0.0539, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.689701325209182e-10, | |
| "loss": 0.0536, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.988568936768132e-11, | |
| "loss": 0.0512, | |
| "step": 1286 | |
| } | |
| ], | |
| "max_steps": 1287, | |
| "num_train_epochs": 3, | |
| "total_flos": 999035450556416.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |