Training in progress, step 950000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c2cdf0990859411fb7c85ca63e432d10f1471e48dc5f4dc74184b1445318034
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79b1f53feeac1b0edb668de9a470df4f2aa602aafbbbab02b19fa387a049f810
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b45d64f58ccbc19a103ee2b486e3ae0d8fd8e258fc7af4c2eaad0b83f3fc572a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37baaaf1d34b48eab4b9f1b1e6566c4b0dfab731d43bb497206f05b08fc421b1
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cabc42515409358ec344dd617c3827e15301aec86dd40b0703aaa747b9ab648c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a0e8f2e6be0cbf1f9833f696c2eada7987f3d4cdaf496d37f24cbf254d548cb
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47f83c0e7dd1b3e03445f4411dbb9c9cc1bcbb9c018fe7bb512c0dfe29ba0b84
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0346809d8d3cd0e408dd0cf4407790a6097435d9d23dfae50689beef17f52894
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e01ce41a891cf7dcd8a18eccba168a8c04bb813917e1e626e5c83157e4ba5c0
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c9b4e343a2af7c2bb37729c1a96b7743275839cf7669689259960b84916a4f7
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ad6f8d9094ac28168658283f3ee5d2511e53f4b22c1d6e5c9b4e90d7a8c2ccb
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11286,11 +11286,131 @@
|
|
| 11286 |
"learning_rate": 1.1373402388763346e-05,
|
| 11287 |
"loss": 0.2834,
|
| 11288 |
"step": 940000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11289 |
}
|
| 11290 |
],
|
| 11291 |
"max_steps": 1000000,
|
| 11292 |
"num_train_epochs": 2,
|
| 11293 |
-
"total_flos": 6.
|
| 11294 |
"trial_name": null,
|
| 11295 |
"trial_params": null
|
| 11296 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.4384531373314946,
|
| 5 |
+
"global_step": 950000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11286 |
"learning_rate": 1.1373402388763346e-05,
|
| 11287 |
"loss": 0.2834,
|
| 11288 |
"step": 940000
|
| 11289 |
+
},
|
| 11290 |
+
{
|
| 11291 |
+
"epoch": 1.42,
|
| 11292 |
+
"learning_rate": 1.1350681316526965e-05,
|
| 11293 |
+
"loss": 0.2837,
|
| 11294 |
+
"step": 940500
|
| 11295 |
+
},
|
| 11296 |
+
{
|
| 11297 |
+
"epoch": 1.42,
|
| 11298 |
+
"learning_rate": 1.1328147928906494e-05,
|
| 11299 |
+
"loss": 0.2842,
|
| 11300 |
+
"step": 941000
|
| 11301 |
+
},
|
| 11302 |
+
{
|
| 11303 |
+
"epoch": 1.42,
|
| 11304 |
+
"learning_rate": 1.1305802287507358e-05,
|
| 11305 |
+
"loss": 0.2839,
|
| 11306 |
+
"step": 941500
|
| 11307 |
+
},
|
| 11308 |
+
{
|
| 11309 |
+
"epoch": 1.42,
|
| 11310 |
+
"learning_rate": 1.1283644453421678e-05,
|
| 11311 |
+
"loss": 0.284,
|
| 11312 |
+
"step": 942000
|
| 11313 |
+
},
|
| 11314 |
+
{
|
| 11315 |
+
"epoch": 1.42,
|
| 11316 |
+
"learning_rate": 1.1261674487228149e-05,
|
| 11317 |
+
"loss": 0.2839,
|
| 11318 |
+
"step": 942500
|
| 11319 |
+
},
|
| 11320 |
+
{
|
| 11321 |
+
"epoch": 1.42,
|
| 11322 |
+
"learning_rate": 1.1239892448991798e-05,
|
| 11323 |
+
"loss": 0.2834,
|
| 11324 |
+
"step": 943000
|
| 11325 |
+
},
|
| 11326 |
+
{
|
| 11327 |
+
"epoch": 1.43,
|
| 11328 |
+
"learning_rate": 1.1218298398263894e-05,
|
| 11329 |
+
"loss": 0.2833,
|
| 11330 |
+
"step": 943500
|
| 11331 |
+
},
|
| 11332 |
+
{
|
| 11333 |
+
"epoch": 1.43,
|
| 11334 |
+
"learning_rate": 1.1196892394081743e-05,
|
| 11335 |
+
"loss": 0.284,
|
| 11336 |
+
"step": 944000
|
| 11337 |
+
},
|
| 11338 |
+
{
|
| 11339 |
+
"epoch": 1.43,
|
| 11340 |
+
"learning_rate": 1.1175674494968552e-05,
|
| 11341 |
+
"loss": 0.2833,
|
| 11342 |
+
"step": 944500
|
| 11343 |
+
},
|
| 11344 |
+
{
|
| 11345 |
+
"epoch": 1.43,
|
| 11346 |
+
"learning_rate": 1.1154644758933235e-05,
|
| 11347 |
+
"loss": 0.2835,
|
| 11348 |
+
"step": 945000
|
| 11349 |
+
},
|
| 11350 |
+
{
|
| 11351 |
+
"epoch": 1.43,
|
| 11352 |
+
"learning_rate": 1.11338032434703e-05,
|
| 11353 |
+
"loss": 0.2832,
|
| 11354 |
+
"step": 945500
|
| 11355 |
+
},
|
| 11356 |
+
{
|
| 11357 |
+
"epoch": 1.43,
|
| 11358 |
+
"learning_rate": 1.1113150005559644e-05,
|
| 11359 |
+
"loss": 0.2836,
|
| 11360 |
+
"step": 946000
|
| 11361 |
+
},
|
| 11362 |
+
{
|
| 11363 |
+
"epoch": 1.43,
|
| 11364 |
+
"learning_rate": 1.1092685101666438e-05,
|
| 11365 |
+
"loss": 0.284,
|
| 11366 |
+
"step": 946500
|
| 11367 |
+
},
|
| 11368 |
+
{
|
| 11369 |
+
"epoch": 1.43,
|
| 11370 |
+
"learning_rate": 1.1072408587740942e-05,
|
| 11371 |
+
"loss": 0.2836,
|
| 11372 |
+
"step": 947000
|
| 11373 |
+
},
|
| 11374 |
+
{
|
| 11375 |
+
"epoch": 1.43,
|
| 11376 |
+
"learning_rate": 1.1052320519218383e-05,
|
| 11377 |
+
"loss": 0.2833,
|
| 11378 |
+
"step": 947500
|
| 11379 |
+
},
|
| 11380 |
+
{
|
| 11381 |
+
"epoch": 1.43,
|
| 11382 |
+
"learning_rate": 1.1032420951018755e-05,
|
| 11383 |
+
"loss": 0.2841,
|
| 11384 |
+
"step": 948000
|
| 11385 |
+
},
|
| 11386 |
+
{
|
| 11387 |
+
"epoch": 1.44,
|
| 11388 |
+
"learning_rate": 1.1012709937546722e-05,
|
| 11389 |
+
"loss": 0.2837,
|
| 11390 |
+
"step": 948500
|
| 11391 |
+
},
|
| 11392 |
+
{
|
| 11393 |
+
"epoch": 1.44,
|
| 11394 |
+
"learning_rate": 1.0993187532691458e-05,
|
| 11395 |
+
"loss": 0.2844,
|
| 11396 |
+
"step": 949000
|
| 11397 |
+
},
|
| 11398 |
+
{
|
| 11399 |
+
"epoch": 1.44,
|
| 11400 |
+
"learning_rate": 1.0973853789826454e-05,
|
| 11401 |
+
"loss": 0.2842,
|
| 11402 |
+
"step": 949500
|
| 11403 |
+
},
|
| 11404 |
+
{
|
| 11405 |
+
"epoch": 1.44,
|
| 11406 |
+
"learning_rate": 1.0954708761809438e-05,
|
| 11407 |
+
"loss": 0.2843,
|
| 11408 |
+
"step": 950000
|
| 11409 |
}
|
| 11410 |
],
|
| 11411 |
"max_steps": 1000000,
|
| 11412 |
"num_train_epochs": 2,
|
| 11413 |
+
"total_flos": 6.422672857591212e+22,
|
| 11414 |
"trial_name": null,
|
| 11415 |
"trial_params": null
|
| 11416 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79b1f53feeac1b0edb668de9a470df4f2aa602aafbbbab02b19fa387a049f810
|
| 3 |
size 449450757
|