Training in progress, step 10500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:289c5f7a117bc2146cbc4b2792b4927c7ce3188416b6d12c24b53c92eac18575
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78aff07fca8298b71f09331247658b387bcda955f1390e18f38dbc6caf805220
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88a0861f9132710b799b6fa2e167a1b0b3b522e3a288bf5f69138ff390819689
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f96c5626b64f285225e7bd0540a942ee4b22f3baba9f0a0f2189b039b8bf46c
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7175,6 +7175,364 @@
|
|
| 7175 |
"eval_samples_per_second": 273.932,
|
| 7176 |
"eval_steps_per_second": 5.753,
|
| 7177 |
"step": 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7178 |
}
|
| 7179 |
],
|
| 7180 |
"logging_steps": 10,
|
|
@@ -7194,7 +7552,7 @@
|
|
| 7194 |
"attributes": {}
|
| 7195 |
}
|
| 7196 |
},
|
| 7197 |
-
"total_flos": 3.
|
| 7198 |
"train_batch_size": 48,
|
| 7199 |
"trial_name": null,
|
| 7200 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.7739483020780538,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 10500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7175 |
"eval_samples_per_second": 273.932,
|
| 7176 |
"eval_steps_per_second": 5.753,
|
| 7177 |
"step": 10000
|
| 7178 |
+
},
|
| 7179 |
+
{
|
| 7180 |
+
"epoch": 1.691164047981078,
|
| 7181 |
+
"grad_norm": 0.47159892320632935,
|
| 7182 |
+
"learning_rate": 2.4865303937104007e-05,
|
| 7183 |
+
"loss": 4.28497314453125,
|
| 7184 |
+
"step": 10010
|
| 7185 |
+
},
|
| 7186 |
+
{
|
| 7187 |
+
"epoch": 1.6928535225544854,
|
| 7188 |
+
"grad_norm": 0.45282673835754395,
|
| 7189 |
+
"learning_rate": 2.460181551284876e-05,
|
| 7190 |
+
"loss": 4.316118621826172,
|
| 7191 |
+
"step": 10020
|
| 7192 |
+
},
|
| 7193 |
+
{
|
| 7194 |
+
"epoch": 1.6945429971278934,
|
| 7195 |
+
"grad_norm": 0.4685194492340088,
|
| 7196 |
+
"learning_rate": 2.433960581310091e-05,
|
| 7197 |
+
"loss": 4.295747375488281,
|
| 7198 |
+
"step": 10030
|
| 7199 |
+
},
|
| 7200 |
+
{
|
| 7201 |
+
"epoch": 1.6962324717013009,
|
| 7202 |
+
"grad_norm": 0.4553631842136383,
|
| 7203 |
+
"learning_rate": 2.4078677511694776e-05,
|
| 7204 |
+
"loss": 4.325288391113281,
|
| 7205 |
+
"step": 10040
|
| 7206 |
+
},
|
| 7207 |
+
{
|
| 7208 |
+
"epoch": 1.6979219462747086,
|
| 7209 |
+
"grad_norm": 0.46230271458625793,
|
| 7210 |
+
"learning_rate": 2.381903326939777e-05,
|
| 7211 |
+
"loss": 4.269796752929688,
|
| 7212 |
+
"step": 10050
|
| 7213 |
+
},
|
| 7214 |
+
{
|
| 7215 |
+
"epoch": 1.6996114208481163,
|
| 7216 |
+
"grad_norm": 0.4607372283935547,
|
| 7217 |
+
"learning_rate": 2.356067573388355e-05,
|
| 7218 |
+
"loss": 4.311262893676758,
|
| 7219 |
+
"step": 10060
|
| 7220 |
+
},
|
| 7221 |
+
{
|
| 7222 |
+
"epoch": 1.7013008954215239,
|
| 7223 |
+
"grad_norm": 0.4789498448371887,
|
| 7224 |
+
"learning_rate": 2.3303607539704628e-05,
|
| 7225 |
+
"loss": 4.303665542602539,
|
| 7226 |
+
"step": 10070
|
| 7227 |
+
},
|
| 7228 |
+
{
|
| 7229 |
+
"epoch": 1.7029903699949316,
|
| 7230 |
+
"grad_norm": 0.46580952405929565,
|
| 7231 |
+
"learning_rate": 2.3047831308265845e-05,
|
| 7232 |
+
"loss": 4.283160400390625,
|
| 7233 |
+
"step": 10080
|
| 7234 |
+
},
|
| 7235 |
+
{
|
| 7236 |
+
"epoch": 1.7046798445683393,
|
| 7237 |
+
"grad_norm": 0.4807932674884796,
|
| 7238 |
+
"learning_rate": 2.2793349647797372e-05,
|
| 7239 |
+
"loss": 4.308661270141601,
|
| 7240 |
+
"step": 10090
|
| 7241 |
+
},
|
| 7242 |
+
{
|
| 7243 |
+
"epoch": 1.7063693191417468,
|
| 7244 |
+
"grad_norm": 0.4682171046733856,
|
| 7245 |
+
"learning_rate": 2.2540165153328345e-05,
|
| 7246 |
+
"loss": 4.298659896850586,
|
| 7247 |
+
"step": 10100
|
| 7248 |
+
},
|
| 7249 |
+
{
|
| 7250 |
+
"epoch": 1.7080587937151546,
|
| 7251 |
+
"grad_norm": 0.46330752968788147,
|
| 7252 |
+
"learning_rate": 2.2288280406660237e-05,
|
| 7253 |
+
"loss": 4.294895935058594,
|
| 7254 |
+
"step": 10110
|
| 7255 |
+
},
|
| 7256 |
+
{
|
| 7257 |
+
"epoch": 1.7097482682885623,
|
| 7258 |
+
"grad_norm": 0.4711052179336548,
|
| 7259 |
+
"learning_rate": 2.2037697976340525e-05,
|
| 7260 |
+
"loss": 4.325272750854492,
|
| 7261 |
+
"step": 10120
|
| 7262 |
+
},
|
| 7263 |
+
{
|
| 7264 |
+
"epoch": 1.7114377428619698,
|
| 7265 |
+
"grad_norm": 0.47657638788223267,
|
| 7266 |
+
"learning_rate": 2.1788420417636704e-05,
|
| 7267 |
+
"loss": 4.280495834350586,
|
| 7268 |
+
"step": 10130
|
| 7269 |
+
},
|
| 7270 |
+
{
|
| 7271 |
+
"epoch": 1.7131272174353775,
|
| 7272 |
+
"grad_norm": 0.45199576020240784,
|
| 7273 |
+
"learning_rate": 2.1540450272509986e-05,
|
| 7274 |
+
"loss": 4.289173889160156,
|
| 7275 |
+
"step": 10140
|
| 7276 |
+
},
|
| 7277 |
+
{
|
| 7278 |
+
"epoch": 1.7148166920087853,
|
| 7279 |
+
"grad_norm": 0.4709782004356384,
|
| 7280 |
+
"learning_rate": 2.129379006958944e-05,
|
| 7281 |
+
"loss": 4.30334243774414,
|
| 7282 |
+
"step": 10150
|
| 7283 |
+
},
|
| 7284 |
+
{
|
| 7285 |
+
"epoch": 1.7165061665821928,
|
| 7286 |
+
"grad_norm": 0.4583008289337158,
|
| 7287 |
+
"learning_rate": 2.104844232414634e-05,
|
| 7288 |
+
"loss": 4.33288459777832,
|
| 7289 |
+
"step": 10160
|
| 7290 |
+
},
|
| 7291 |
+
{
|
| 7292 |
+
"epoch": 1.7181956411556008,
|
| 7293 |
+
"grad_norm": 0.4560486674308777,
|
| 7294 |
+
"learning_rate": 2.080440953806844e-05,
|
| 7295 |
+
"loss": 4.312181091308593,
|
| 7296 |
+
"step": 10170
|
| 7297 |
+
},
|
| 7298 |
+
{
|
| 7299 |
+
"epoch": 1.7198851157290083,
|
| 7300 |
+
"grad_norm": 0.45241913199424744,
|
| 7301 |
+
"learning_rate": 2.056169419983432e-05,
|
| 7302 |
+
"loss": 4.302781677246093,
|
| 7303 |
+
"step": 10180
|
| 7304 |
+
},
|
| 7305 |
+
{
|
| 7306 |
+
"epoch": 1.721574590302416,
|
| 7307 |
+
"grad_norm": 0.4574364125728607,
|
| 7308 |
+
"learning_rate": 2.0320298784488177e-05,
|
| 7309 |
+
"loss": 4.296425628662109,
|
| 7310 |
+
"step": 10190
|
| 7311 |
+
},
|
| 7312 |
+
{
|
| 7313 |
+
"epoch": 1.7232640648758237,
|
| 7314 |
+
"grad_norm": 0.4723096489906311,
|
| 7315 |
+
"learning_rate": 2.008022575361464e-05,
|
| 7316 |
+
"loss": 4.3003795623779295,
|
| 7317 |
+
"step": 10200
|
| 7318 |
+
},
|
| 7319 |
+
{
|
| 7320 |
+
"epoch": 1.7249535394492312,
|
| 7321 |
+
"grad_norm": 0.4733101427555084,
|
| 7322 |
+
"learning_rate": 1.9841477555313428e-05,
|
| 7323 |
+
"loss": 4.288959503173828,
|
| 7324 |
+
"step": 10210
|
| 7325 |
+
},
|
| 7326 |
+
{
|
| 7327 |
+
"epoch": 1.726643014022639,
|
| 7328 |
+
"grad_norm": 0.4613873362541199,
|
| 7329 |
+
"learning_rate": 1.960405662417458e-05,
|
| 7330 |
+
"loss": 4.3150989532470705,
|
| 7331 |
+
"step": 10220
|
| 7332 |
+
},
|
| 7333 |
+
{
|
| 7334 |
+
"epoch": 1.7283324885960467,
|
| 7335 |
+
"grad_norm": 0.47345536947250366,
|
| 7336 |
+
"learning_rate": 1.9367965381253632e-05,
|
| 7337 |
+
"loss": 4.287479400634766,
|
| 7338 |
+
"step": 10230
|
| 7339 |
+
},
|
| 7340 |
+
{
|
| 7341 |
+
"epoch": 1.7300219631694542,
|
| 7342 |
+
"grad_norm": 0.4718509912490845,
|
| 7343 |
+
"learning_rate": 1.9133206234046833e-05,
|
| 7344 |
+
"loss": 4.321730422973633,
|
| 7345 |
+
"step": 10240
|
| 7346 |
+
},
|
| 7347 |
+
{
|
| 7348 |
+
"epoch": 1.731711437742862,
|
| 7349 |
+
"grad_norm": 0.455735445022583,
|
| 7350 |
+
"learning_rate": 1.8899781576466605e-05,
|
| 7351 |
+
"loss": 4.2946220397949215,
|
| 7352 |
+
"step": 10250
|
| 7353 |
+
},
|
| 7354 |
+
{
|
| 7355 |
+
"epoch": 1.7334009123162697,
|
| 7356 |
+
"grad_norm": 0.4965671896934509,
|
| 7357 |
+
"learning_rate": 1.86676937888172e-05,
|
| 7358 |
+
"loss": 4.301831817626953,
|
| 7359 |
+
"step": 10260
|
| 7360 |
+
},
|
| 7361 |
+
{
|
| 7362 |
+
"epoch": 1.7350903868896772,
|
| 7363 |
+
"grad_norm": 0.4581054449081421,
|
| 7364 |
+
"learning_rate": 1.8436945237770347e-05,
|
| 7365 |
+
"loss": 4.305143737792969,
|
| 7366 |
+
"step": 10270
|
| 7367 |
+
},
|
| 7368 |
+
{
|
| 7369 |
+
"epoch": 1.736779861463085,
|
| 7370 |
+
"grad_norm": 0.4591616094112396,
|
| 7371 |
+
"learning_rate": 1.8207538276341255e-05,
|
| 7372 |
+
"loss": 4.309583282470703,
|
| 7373 |
+
"step": 10280
|
| 7374 |
+
},
|
| 7375 |
+
{
|
| 7376 |
+
"epoch": 1.7384693360364927,
|
| 7377 |
+
"grad_norm": 0.4735301733016968,
|
| 7378 |
+
"learning_rate": 1.7979475243864422e-05,
|
| 7379 |
+
"loss": 4.28990478515625,
|
| 7380 |
+
"step": 10290
|
| 7381 |
+
},
|
| 7382 |
+
{
|
| 7383 |
+
"epoch": 1.7401588106099002,
|
| 7384 |
+
"grad_norm": 0.46391761302948,
|
| 7385 |
+
"learning_rate": 1.7752758465969835e-05,
|
| 7386 |
+
"loss": 4.2906452178955075,
|
| 7387 |
+
"step": 10300
|
| 7388 |
+
},
|
| 7389 |
+
{
|
| 7390 |
+
"epoch": 1.7418482851833081,
|
| 7391 |
+
"grad_norm": 0.4546545147895813,
|
| 7392 |
+
"learning_rate": 1.7527390254559564e-05,
|
| 7393 |
+
"loss": 4.305644226074219,
|
| 7394 |
+
"step": 10310
|
| 7395 |
+
},
|
| 7396 |
+
{
|
| 7397 |
+
"epoch": 1.7435377597567157,
|
| 7398 |
+
"grad_norm": 0.4470182955265045,
|
| 7399 |
+
"learning_rate": 1.7303372907783646e-05,
|
| 7400 |
+
"loss": 4.288211059570313,
|
| 7401 |
+
"step": 10320
|
| 7402 |
+
},
|
| 7403 |
+
{
|
| 7404 |
+
"epoch": 1.7452272343301232,
|
| 7405 |
+
"grad_norm": 0.4606943726539612,
|
| 7406 |
+
"learning_rate": 1.708070871001704e-05,
|
| 7407 |
+
"loss": 4.294968795776367,
|
| 7408 |
+
"step": 10330
|
| 7409 |
+
},
|
| 7410 |
+
{
|
| 7411 |
+
"epoch": 1.7469167089035311,
|
| 7412 |
+
"grad_norm": 0.4543667733669281,
|
| 7413 |
+
"learning_rate": 1.6859399931836182e-05,
|
| 7414 |
+
"loss": 4.301618194580078,
|
| 7415 |
+
"step": 10340
|
| 7416 |
+
},
|
| 7417 |
+
{
|
| 7418 |
+
"epoch": 1.7486061834769386,
|
| 7419 |
+
"grad_norm": 0.472310870885849,
|
| 7420 |
+
"learning_rate": 1.663944882999596e-05,
|
| 7421 |
+
"loss": 4.318760681152344,
|
| 7422 |
+
"step": 10350
|
| 7423 |
+
},
|
| 7424 |
+
{
|
| 7425 |
+
"epoch": 1.7502956580503464,
|
| 7426 |
+
"grad_norm": 0.44963911175727844,
|
| 7427 |
+
"learning_rate": 1.6420857647406533e-05,
|
| 7428 |
+
"loss": 4.308442687988281,
|
| 7429 |
+
"step": 10360
|
| 7430 |
+
},
|
| 7431 |
+
{
|
| 7432 |
+
"epoch": 1.751985132623754,
|
| 7433 |
+
"grad_norm": 0.45367759466171265,
|
| 7434 |
+
"learning_rate": 1.6203628613110513e-05,
|
| 7435 |
+
"loss": 4.320900344848633,
|
| 7436 |
+
"step": 10370
|
| 7437 |
+
},
|
| 7438 |
+
{
|
| 7439 |
+
"epoch": 1.7536746071971616,
|
| 7440 |
+
"grad_norm": 0.4687769114971161,
|
| 7441 |
+
"learning_rate": 1.598776394226035e-05,
|
| 7442 |
+
"loss": 4.342009735107422,
|
| 7443 |
+
"step": 10380
|
| 7444 |
+
},
|
| 7445 |
+
{
|
| 7446 |
+
"epoch": 1.7553640817705694,
|
| 7447 |
+
"grad_norm": 0.4652376174926758,
|
| 7448 |
+
"learning_rate": 1.5773265836095615e-05,
|
| 7449 |
+
"loss": 4.283346557617188,
|
| 7450 |
+
"step": 10390
|
| 7451 |
+
},
|
| 7452 |
+
{
|
| 7453 |
+
"epoch": 1.757053556343977,
|
| 7454 |
+
"grad_norm": 0.44677111506462097,
|
| 7455 |
+
"learning_rate": 1.5560136481920583e-05,
|
| 7456 |
+
"loss": 4.30499496459961,
|
| 7457 |
+
"step": 10400
|
| 7458 |
+
},
|
| 7459 |
+
{
|
| 7460 |
+
"epoch": 1.7587430309173846,
|
| 7461 |
+
"grad_norm": 0.4536132514476776,
|
| 7462 |
+
"learning_rate": 1.5348378053081885e-05,
|
| 7463 |
+
"loss": 4.284192657470703,
|
| 7464 |
+
"step": 10410
|
| 7465 |
+
},
|
| 7466 |
+
{
|
| 7467 |
+
"epoch": 1.7604325054907923,
|
| 7468 |
+
"grad_norm": 0.4781353175640106,
|
| 7469 |
+
"learning_rate": 1.5137992708946522e-05,
|
| 7470 |
+
"loss": 4.299782180786133,
|
| 7471 |
+
"step": 10420
|
| 7472 |
+
},
|
| 7473 |
+
{
|
| 7474 |
+
"epoch": 1.7621219800642,
|
| 7475 |
+
"grad_norm": 0.46639731526374817,
|
| 7476 |
+
"learning_rate": 1.4928982594879602e-05,
|
| 7477 |
+
"loss": 4.301108169555664,
|
| 7478 |
+
"step": 10430
|
| 7479 |
+
},
|
| 7480 |
+
{
|
| 7481 |
+
"epoch": 1.7638114546376076,
|
| 7482 |
+
"grad_norm": 0.4624445140361786,
|
| 7483 |
+
"learning_rate": 1.4721349842222623e-05,
|
| 7484 |
+
"loss": 4.283761596679687,
|
| 7485 |
+
"step": 10440
|
| 7486 |
+
},
|
| 7487 |
+
{
|
| 7488 |
+
"epoch": 1.7655009292110155,
|
| 7489 |
+
"grad_norm": 0.47024649381637573,
|
| 7490 |
+
"learning_rate": 1.4515096568271728e-05,
|
| 7491 |
+
"loss": 4.299430084228516,
|
| 7492 |
+
"step": 10450
|
| 7493 |
+
},
|
| 7494 |
+
{
|
| 7495 |
+
"epoch": 1.767190403784423,
|
| 7496 |
+
"grad_norm": 0.4745561182498932,
|
| 7497 |
+
"learning_rate": 1.4310224876256071e-05,
|
| 7498 |
+
"loss": 4.319121551513672,
|
| 7499 |
+
"step": 10460
|
| 7500 |
+
},
|
| 7501 |
+
{
|
| 7502 |
+
"epoch": 1.7688798783578306,
|
| 7503 |
+
"grad_norm": 0.4728463292121887,
|
| 7504 |
+
"learning_rate": 1.410673685531638e-05,
|
| 7505 |
+
"loss": 4.306048583984375,
|
| 7506 |
+
"step": 10470
|
| 7507 |
+
},
|
| 7508 |
+
{
|
| 7509 |
+
"epoch": 1.7705693529312385,
|
| 7510 |
+
"grad_norm": 0.460742712020874,
|
| 7511 |
+
"learning_rate": 1.390463458048357e-05,
|
| 7512 |
+
"loss": 4.31497802734375,
|
| 7513 |
+
"step": 10480
|
| 7514 |
+
},
|
| 7515 |
+
{
|
| 7516 |
+
"epoch": 1.772258827504646,
|
| 7517 |
+
"grad_norm": 0.46218180656433105,
|
| 7518 |
+
"learning_rate": 1.3703920112657856e-05,
|
| 7519 |
+
"loss": 4.3034709930419925,
|
| 7520 |
+
"step": 10490
|
| 7521 |
+
},
|
| 7522 |
+
{
|
| 7523 |
+
"epoch": 1.7739483020780538,
|
| 7524 |
+
"grad_norm": 0.46378350257873535,
|
| 7525 |
+
"learning_rate": 1.3504595498587378e-05,
|
| 7526 |
+
"loss": 4.3008544921875,
|
| 7527 |
+
"step": 10500
|
| 7528 |
+
},
|
| 7529 |
+
{
|
| 7530 |
+
"epoch": 1.7739483020780538,
|
| 7531 |
+
"eval_loss": 4.253804683685303,
|
| 7532 |
+
"eval_runtime": 3.6144,
|
| 7533 |
+
"eval_samples_per_second": 276.668,
|
| 7534 |
+
"eval_steps_per_second": 5.81,
|
| 7535 |
+
"step": 10500
|
| 7536 |
}
|
| 7537 |
],
|
| 7538 |
"logging_steps": 10,
|
|
|
|
| 7552 |
"attributes": {}
|
| 7553 |
}
|
| 7554 |
},
|
| 7555 |
+
"total_flos": 3.511775715466936e+17,
|
| 7556 |
"train_batch_size": 48,
|
| 7557 |
"trial_name": null,
|
| 7558 |
"trial_params": null
|