Training in progress, step 330000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- last-checkpoint/training_args.bin +2 -2
- pytorch_model.bin +1 -1
- training_args.bin +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7d2a5e73283aa1880b29f29feb6316701d271842328e195271c3e399e6c920a
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fb067e8d99d964806b68ae99e0f39bdecb3dd4f00cbe7958a115e1392dffcc7
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:623a6385c6947219614a206f325f0d5bbe602621d1fd3e48972b7fdd72be25d2
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02ac5973a9776fb5558d0d5471ce91f9f5f08200895aa832ab0411732da0bb62
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbe8a6d91f7528a2132dc105f5a011209849ce078b5ae84cd752340614efc89b
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57da30fd98664b01593e067ac432f0638ebe9389fdcddf47146e4e5d78f9b45b
|
| 3 |
+
size 14439
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41ab9ece5cf2fe3d99e3bf57fddd4aebe29db0bd41b88067fc2fa8ae9ef5bedb
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -2374,11 +2374,85 @@
|
|
| 2374 |
"eval_samples_per_second": 961.537,
|
| 2375 |
"eval_steps_per_second": 15.385,
|
| 2376 |
"step": 320000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2377 |
}
|
| 2378 |
],
|
| 2379 |
"max_steps": 1000000,
|
| 2380 |
"num_train_epochs": 16,
|
| 2381 |
-
"total_flos": 2.
|
| 2382 |
"trial_name": null,
|
| 2383 |
"trial_params": null
|
| 2384 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 5.039168079160749,
|
| 5 |
+
"global_step": 330000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 2374 |
"eval_samples_per_second": 961.537,
|
| 2375 |
"eval_steps_per_second": 15.385,
|
| 2376 |
"step": 320000
|
| 2377 |
+
},
|
| 2378 |
+
{
|
| 2379 |
+
"epoch": 4.9,
|
| 2380 |
+
"learning_rate": 0.00012372180635716656,
|
| 2381 |
+
"loss": 0.2874,
|
| 2382 |
+
"step": 321000
|
| 2383 |
+
},
|
| 2384 |
+
{
|
| 2385 |
+
"epoch": 4.92,
|
| 2386 |
+
"learning_rate": 0.00012354078948651604,
|
| 2387 |
+
"loss": 0.2873,
|
| 2388 |
+
"step": 322000
|
| 2389 |
+
},
|
| 2390 |
+
{
|
| 2391 |
+
"epoch": 4.93,
|
| 2392 |
+
"learning_rate": 0.00012335929646076758,
|
| 2393 |
+
"loss": 0.2868,
|
| 2394 |
+
"step": 323000
|
| 2395 |
+
},
|
| 2396 |
+
{
|
| 2397 |
+
"epoch": 4.95,
|
| 2398 |
+
"learning_rate": 0.00012317732926469976,
|
| 2399 |
+
"loss": 0.2871,
|
| 2400 |
+
"step": 324000
|
| 2401 |
+
},
|
| 2402 |
+
{
|
| 2403 |
+
"epoch": 4.96,
|
| 2404 |
+
"learning_rate": 0.00012299488988827675,
|
| 2405 |
+
"loss": 0.2869,
|
| 2406 |
+
"step": 325000
|
| 2407 |
+
},
|
| 2408 |
+
{
|
| 2409 |
+
"epoch": 4.96,
|
| 2410 |
+
"eval_runtime": 1.3977,
|
| 2411 |
+
"eval_samples_per_second": 715.452,
|
| 2412 |
+
"eval_steps_per_second": 11.447,
|
| 2413 |
+
"step": 325000
|
| 2414 |
+
},
|
| 2415 |
+
{
|
| 2416 |
+
"epoch": 4.98,
|
| 2417 |
+
"learning_rate": 0.0001228119803266263,
|
| 2418 |
+
"loss": 0.2867,
|
| 2419 |
+
"step": 326000
|
| 2420 |
+
},
|
| 2421 |
+
{
|
| 2422 |
+
"epoch": 4.99,
|
| 2423 |
+
"learning_rate": 0.0001226286025800181,
|
| 2424 |
+
"loss": 0.2866,
|
| 2425 |
+
"step": 327000
|
| 2426 |
+
},
|
| 2427 |
+
{
|
| 2428 |
+
"epoch": 5.01,
|
| 2429 |
+
"learning_rate": 0.00012244475865384177,
|
| 2430 |
+
"loss": 0.2862,
|
| 2431 |
+
"step": 328000
|
| 2432 |
+
},
|
| 2433 |
+
{
|
| 2434 |
+
"epoch": 5.02,
|
| 2435 |
+
"learning_rate": 0.00012226045055858505,
|
| 2436 |
+
"loss": 0.2858,
|
| 2437 |
+
"step": 329000
|
| 2438 |
+
},
|
| 2439 |
+
{
|
| 2440 |
+
"epoch": 5.04,
|
| 2441 |
+
"learning_rate": 0.00012207568030981174,
|
| 2442 |
+
"loss": 0.2859,
|
| 2443 |
+
"step": 330000
|
| 2444 |
+
},
|
| 2445 |
+
{
|
| 2446 |
+
"epoch": 5.04,
|
| 2447 |
+
"eval_runtime": 1.1314,
|
| 2448 |
+
"eval_samples_per_second": 883.862,
|
| 2449 |
+
"eval_steps_per_second": 14.142,
|
| 2450 |
+
"step": 330000
|
| 2451 |
}
|
| 2452 |
],
|
| 2453 |
"max_steps": 1000000,
|
| 2454 |
"num_train_epochs": 16,
|
| 2455 |
+
"total_flos": 2.313305328660712e+22,
|
| 2456 |
"trial_name": null,
|
| 2457 |
"trial_params": null
|
| 2458 |
}
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69d9dd505c56ab02fdd4405a013eaa211c194bd407e1877bba9642905743e82c
|
| 3 |
+
size 3311
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fb067e8d99d964806b68ae99e0f39bdecb3dd4f00cbe7958a115e1392dffcc7
|
| 3 |
size 449471589
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69d9dd505c56ab02fdd4405a013eaa211c194bd407e1877bba9642905743e82c
|
| 3 |
+
size 3311
|