Training in progress, step 720000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a026519ac0ff0c3f1289d77d525568015b7857390a799b13b0638f4259cfacf4
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1129389ce9bcf343c0f3cd3aed67df3cad67444a99046ba4741058e82d747211
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aae7d49de4956e7c91d479fafd3b4d2ed56ba19e47ab8cacf05f2f824d1b2a28
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cebea51ed6ff0008f8a6cacb32ade3887e6067fbd934b29f2ec5132ff8c8883a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b973a3a6eadc1982d4cc67d2b41bd0f42e96e285ea7f0a54482ed3274795569d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41cae4c446a75e16a0da874d105e34b48768289141d592a014669a4b78d9fe62
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed967cda4343e3de3b9b5189aaee239028fc09bd89309a5564f84c1374d13bfc
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5260,11 +5260,85 @@
|
|
| 5260 |
"eval_samples_per_second": 1338.326,
|
| 5261 |
"eval_steps_per_second": 21.413,
|
| 5262 |
"step": 710000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5263 |
}
|
| 5264 |
],
|
| 5265 |
"max_steps": 1000000,
|
| 5266 |
"num_train_epochs": 16,
|
| 5267 |
-
"total_flos":
|
| 5268 |
"trial_name": null,
|
| 5269 |
"trial_params": null
|
| 5270 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.994548536350726,
|
| 5 |
+
"global_step": 720000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5260 |
"eval_samples_per_second": 1338.326,
|
| 5261 |
"eval_steps_per_second": 21.413,
|
| 5262 |
"step": 710000
|
| 5263 |
+
},
|
| 5264 |
+
{
|
| 5265 |
+
"epoch": 10.86,
|
| 5266 |
+
"learning_rate": 3.960771378517049e-05,
|
| 5267 |
+
"loss": 0.2438,
|
| 5268 |
+
"step": 711000
|
| 5269 |
+
},
|
| 5270 |
+
{
|
| 5271 |
+
"epoch": 10.87,
|
| 5272 |
+
"learning_rate": 3.941887568171766e-05,
|
| 5273 |
+
"loss": 0.2464,
|
| 5274 |
+
"step": 712000
|
| 5275 |
+
},
|
| 5276 |
+
{
|
| 5277 |
+
"epoch": 10.89,
|
| 5278 |
+
"learning_rate": 3.923048136693873e-05,
|
| 5279 |
+
"loss": 0.2445,
|
| 5280 |
+
"step": 713000
|
| 5281 |
+
},
|
| 5282 |
+
{
|
| 5283 |
+
"epoch": 10.9,
|
| 5284 |
+
"learning_rate": 3.904253290108369e-05,
|
| 5285 |
+
"loss": 0.2435,
|
| 5286 |
+
"step": 714000
|
| 5287 |
+
},
|
| 5288 |
+
{
|
| 5289 |
+
"epoch": 10.92,
|
| 5290 |
+
"learning_rate": 3.885503233952689e-05,
|
| 5291 |
+
"loss": 0.2446,
|
| 5292 |
+
"step": 715000
|
| 5293 |
+
},
|
| 5294 |
+
{
|
| 5295 |
+
"epoch": 10.92,
|
| 5296 |
+
"eval_runtime": 0.8432,
|
| 5297 |
+
"eval_samples_per_second": 1186.017,
|
| 5298 |
+
"eval_steps_per_second": 18.976,
|
| 5299 |
+
"step": 715000
|
| 5300 |
+
},
|
| 5301 |
+
{
|
| 5302 |
+
"epoch": 10.93,
|
| 5303 |
+
"learning_rate": 3.86679817327444e-05,
|
| 5304 |
+
"loss": 0.2432,
|
| 5305 |
+
"step": 716000
|
| 5306 |
+
},
|
| 5307 |
+
{
|
| 5308 |
+
"epoch": 10.95,
|
| 5309 |
+
"learning_rate": 3.848138312629171e-05,
|
| 5310 |
+
"loss": 0.2433,
|
| 5311 |
+
"step": 717000
|
| 5312 |
+
},
|
| 5313 |
+
{
|
| 5314 |
+
"epoch": 10.96,
|
| 5315 |
+
"learning_rate": 3.8295238560781317e-05,
|
| 5316 |
+
"loss": 0.2436,
|
| 5317 |
+
"step": 718000
|
| 5318 |
+
},
|
| 5319 |
+
{
|
| 5320 |
+
"epoch": 10.98,
|
| 5321 |
+
"learning_rate": 3.810955007186029e-05,
|
| 5322 |
+
"loss": 0.2433,
|
| 5323 |
+
"step": 719000
|
| 5324 |
+
},
|
| 5325 |
+
{
|
| 5326 |
+
"epoch": 10.99,
|
| 5327 |
+
"learning_rate": 3.792431969018824e-05,
|
| 5328 |
+
"loss": 0.243,
|
| 5329 |
+
"step": 720000
|
| 5330 |
+
},
|
| 5331 |
+
{
|
| 5332 |
+
"epoch": 10.99,
|
| 5333 |
+
"eval_runtime": 0.7755,
|
| 5334 |
+
"eval_samples_per_second": 1289.466,
|
| 5335 |
+
"eval_steps_per_second": 20.631,
|
| 5336 |
+
"step": 720000
|
| 5337 |
}
|
| 5338 |
],
|
| 5339 |
"max_steps": 1000000,
|
| 5340 |
"num_train_epochs": 16,
|
| 5341 |
+
"total_flos": 5.047212223503507e+22,
|
| 5342 |
"trial_name": null,
|
| 5343 |
"trial_params": null
|
| 5344 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1129389ce9bcf343c0f3cd3aed67df3cad67444a99046ba4741058e82d747211
|
| 3 |
size 449471589
|