Training in progress, step 860000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5c106db74e42dc51679adae4b838c3fc769d432ccf7ad2ad85b3ca08257bba7
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6b3810346b06651e6d4da88b6c7ab5a4c39c9ed73386ee5a2037c7eee7d792a
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91922deb8d5a3dcc119e5076f52f06901e3c1963deb40c98ef6a2a56110eecfe
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eda717afacbcc37a057674c94e7755da0f047a833361ec1cbdae8c11451a0ab4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5b4e2d79497f15fdd0b8e63be121802d33321a338ff2ad99cc0c2d94fed90e9
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f6dd21c66e6f1818612e884579b2f3fe6cd6395a735470cd934326c7e7a0170
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1af16532ed7776301ec2b0d23baf8c67ba74ec07e3f7e0782860705643ea3c80
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6296,11 +6296,85 @@
|
|
| 6296 |
"eval_samples_per_second": 1203.685,
|
| 6297 |
"eval_steps_per_second": 19.259,
|
| 6298 |
"step": 850000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6299 |
}
|
| 6300 |
],
|
| 6301 |
"max_steps": 1000000,
|
| 6302 |
"num_train_epochs": 16,
|
| 6303 |
-
"total_flos":
|
| 6304 |
"trial_name": null,
|
| 6305 |
"trial_params": null
|
| 6306 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 13.132377418418923,
|
| 5 |
+
"global_step": 860000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6296 |
"eval_samples_per_second": 1203.685,
|
| 6297 |
"eval_steps_per_second": 19.259,
|
| 6298 |
"step": 850000
|
| 6299 |
+
},
|
| 6300 |
+
{
|
| 6301 |
+
"epoch": 12.99,
|
| 6302 |
+
"learning_rate": 1.8326999187910095e-05,
|
| 6303 |
+
"loss": 0.2342,
|
| 6304 |
+
"step": 851000
|
| 6305 |
+
},
|
| 6306 |
+
{
|
| 6307 |
+
"epoch": 13.01,
|
| 6308 |
+
"learning_rate": 1.8217835406330415e-05,
|
| 6309 |
+
"loss": 0.2344,
|
| 6310 |
+
"step": 852000
|
| 6311 |
+
},
|
| 6312 |
+
{
|
| 6313 |
+
"epoch": 13.03,
|
| 6314 |
+
"learning_rate": 1.810934726460436e-05,
|
| 6315 |
+
"loss": 0.2328,
|
| 6316 |
+
"step": 853000
|
| 6317 |
+
},
|
| 6318 |
+
{
|
| 6319 |
+
"epoch": 13.04,
|
| 6320 |
+
"learning_rate": 1.800153594914084e-05,
|
| 6321 |
+
"loss": 0.2326,
|
| 6322 |
+
"step": 854000
|
| 6323 |
+
},
|
| 6324 |
+
{
|
| 6325 |
+
"epoch": 13.06,
|
| 6326 |
+
"learning_rate": 1.7894402638947176e-05,
|
| 6327 |
+
"loss": 0.2325,
|
| 6328 |
+
"step": 855000
|
| 6329 |
+
},
|
| 6330 |
+
{
|
| 6331 |
+
"epoch": 13.06,
|
| 6332 |
+
"eval_runtime": 0.7234,
|
| 6333 |
+
"eval_samples_per_second": 1382.419,
|
| 6334 |
+
"eval_steps_per_second": 22.119,
|
| 6335 |
+
"step": 855000
|
| 6336 |
+
},
|
| 6337 |
+
{
|
| 6338 |
+
"epoch": 13.07,
|
| 6339 |
+
"learning_rate": 1.778794850561604e-05,
|
| 6340 |
+
"loss": 0.2327,
|
| 6341 |
+
"step": 856000
|
| 6342 |
+
},
|
| 6343 |
+
{
|
| 6344 |
+
"epoch": 13.09,
|
| 6345 |
+
"learning_rate": 1.7682174713312805e-05,
|
| 6346 |
+
"loss": 0.2326,
|
| 6347 |
+
"step": 857000
|
| 6348 |
+
},
|
| 6349 |
+
{
|
| 6350 |
+
"epoch": 13.1,
|
| 6351 |
+
"learning_rate": 1.75770824187627e-05,
|
| 6352 |
+
"loss": 0.2325,
|
| 6353 |
+
"step": 858000
|
| 6354 |
+
},
|
| 6355 |
+
{
|
| 6356 |
+
"epoch": 13.12,
|
| 6357 |
+
"learning_rate": 1.747267277123821e-05,
|
| 6358 |
+
"loss": 0.2327,
|
| 6359 |
+
"step": 859000
|
| 6360 |
+
},
|
| 6361 |
+
{
|
| 6362 |
+
"epoch": 13.13,
|
| 6363 |
+
"learning_rate": 1.7368946912546556e-05,
|
| 6364 |
+
"loss": 0.2329,
|
| 6365 |
+
"step": 860000
|
| 6366 |
+
},
|
| 6367 |
+
{
|
| 6368 |
+
"epoch": 13.13,
|
| 6369 |
+
"eval_runtime": 0.7568,
|
| 6370 |
+
"eval_samples_per_second": 1321.327,
|
| 6371 |
+
"eval_steps_per_second": 21.141,
|
| 6372 |
+
"step": 860000
|
| 6373 |
}
|
| 6374 |
],
|
| 6375 |
"max_steps": 1000000,
|
| 6376 |
"num_train_epochs": 16,
|
| 6377 |
+
"total_flos": 6.028613906723921e+22,
|
| 6378 |
"trial_name": null,
|
| 6379 |
"trial_params": null
|
| 6380 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6b3810346b06651e6d4da88b6c7ab5a4c39c9ed73386ee5a2037c7eee7d792a
|
| 3 |
size 449471589
|