Training in progress, epoch 3, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1980860410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3185710f9ed48293a778de595fcd38098844825122d64b9829b1b93f6412f403
|
| 3 |
size 1980860410
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 990409330
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6d9360daf7120c5d39d8ef5625d204b3b6837a8189e798928f762e83a610468
|
| 3 |
size 990409330
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbddb6a0f6f63e14b35b515dcd6478e86f1cce79a693afe0ccaec7cdbe6f4fcc
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:727027b6d6031aa8f34a43940937c6f906ae36c7156fab65320547a0006b7c81
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.835176944732666,
|
| 3 |
"best_model_checkpoint": "dq158/morbius/checkpoint-9568",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -273,13 +273,146 @@
|
|
| 273 |
"eval_steps_per_second": 1.314,
|
| 274 |
"eval_translation_length": 104272,
|
| 275 |
"step": 19136
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
}
|
| 277 |
],
|
| 278 |
"logging_steps": 500,
|
| 279 |
"max_steps": 47840,
|
| 280 |
"num_train_epochs": 5,
|
| 281 |
"save_steps": 500,
|
| 282 |
-
"total_flos":
|
| 283 |
"trial_name": null,
|
| 284 |
"trial_params": null
|
| 285 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.835176944732666,
|
| 3 |
"best_model_checkpoint": "dq158/morbius/checkpoint-9568",
|
| 4 |
+
"epoch": 3.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 28704,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 273 |
"eval_steps_per_second": 1.314,
|
| 274 |
"eval_translation_length": 104272,
|
| 275 |
"step": 19136
|
| 276 |
+
},
|
| 277 |
+
{
|
| 278 |
+
"epoch": 2.04,
|
| 279 |
+
"learning_rate": 2.9619565217391305e-05,
|
| 280 |
+
"loss": 1.9566,
|
| 281 |
+
"step": 19500
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"epoch": 2.09,
|
| 285 |
+
"learning_rate": 2.9096989966555184e-05,
|
| 286 |
+
"loss": 1.9582,
|
| 287 |
+
"step": 20000
|
| 288 |
+
},
|
| 289 |
+
{
|
| 290 |
+
"epoch": 2.14,
|
| 291 |
+
"learning_rate": 2.8574414715719066e-05,
|
| 292 |
+
"loss": 1.9681,
|
| 293 |
+
"step": 20500
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"epoch": 2.19,
|
| 297 |
+
"learning_rate": 2.8051839464882945e-05,
|
| 298 |
+
"loss": 1.9374,
|
| 299 |
+
"step": 21000
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"epoch": 2.25,
|
| 303 |
+
"learning_rate": 2.7529264214046824e-05,
|
| 304 |
+
"loss": 1.9412,
|
| 305 |
+
"step": 21500
|
| 306 |
+
},
|
| 307 |
+
{
|
| 308 |
+
"epoch": 2.3,
|
| 309 |
+
"learning_rate": 2.7006688963210703e-05,
|
| 310 |
+
"loss": 1.9557,
|
| 311 |
+
"step": 22000
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"epoch": 2.35,
|
| 315 |
+
"learning_rate": 2.6484113712374582e-05,
|
| 316 |
+
"loss": 1.9684,
|
| 317 |
+
"step": 22500
|
| 318 |
+
},
|
| 319 |
+
{
|
| 320 |
+
"epoch": 2.4,
|
| 321 |
+
"learning_rate": 2.5961538461538464e-05,
|
| 322 |
+
"loss": 1.9307,
|
| 323 |
+
"step": 23000
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"epoch": 2.46,
|
| 327 |
+
"learning_rate": 2.5438963210702343e-05,
|
| 328 |
+
"loss": 1.9237,
|
| 329 |
+
"step": 23500
|
| 330 |
+
},
|
| 331 |
+
{
|
| 332 |
+
"epoch": 2.51,
|
| 333 |
+
"learning_rate": 2.491638795986622e-05,
|
| 334 |
+
"loss": 1.9608,
|
| 335 |
+
"step": 24000
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"epoch": 2.56,
|
| 339 |
+
"learning_rate": 2.43938127090301e-05,
|
| 340 |
+
"loss": 1.9549,
|
| 341 |
+
"step": 24500
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"epoch": 2.61,
|
| 345 |
+
"learning_rate": 2.3871237458193983e-05,
|
| 346 |
+
"loss": 1.9211,
|
| 347 |
+
"step": 25000
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"epoch": 2.67,
|
| 351 |
+
"learning_rate": 2.334866220735786e-05,
|
| 352 |
+
"loss": 1.9698,
|
| 353 |
+
"step": 25500
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"epoch": 2.72,
|
| 357 |
+
"learning_rate": 2.282608695652174e-05,
|
| 358 |
+
"loss": 1.9413,
|
| 359 |
+
"step": 26000
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 2.77,
|
| 363 |
+
"learning_rate": 2.230351170568562e-05,
|
| 364 |
+
"loss": 1.9943,
|
| 365 |
+
"step": 26500
|
| 366 |
+
},
|
| 367 |
+
{
|
| 368 |
+
"epoch": 2.82,
|
| 369 |
+
"learning_rate": 2.1780936454849498e-05,
|
| 370 |
+
"loss": 1.938,
|
| 371 |
+
"step": 27000
|
| 372 |
+
},
|
| 373 |
+
{
|
| 374 |
+
"epoch": 2.87,
|
| 375 |
+
"learning_rate": 2.125836120401338e-05,
|
| 376 |
+
"loss": 1.987,
|
| 377 |
+
"step": 27500
|
| 378 |
+
},
|
| 379 |
+
{
|
| 380 |
+
"epoch": 2.93,
|
| 381 |
+
"learning_rate": 2.073578595317726e-05,
|
| 382 |
+
"loss": 1.9455,
|
| 383 |
+
"step": 28000
|
| 384 |
+
},
|
| 385 |
+
{
|
| 386 |
+
"epoch": 2.98,
|
| 387 |
+
"learning_rate": 2.0213210702341138e-05,
|
| 388 |
+
"loss": 1.9788,
|
| 389 |
+
"step": 28500
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"epoch": 3.0,
|
| 393 |
+
"eval_bleu": 0.08712036412034174,
|
| 394 |
+
"eval_brevity_penalty": 0.7810596870491452,
|
| 395 |
+
"eval_length_ratio": 0.8018579316519923,
|
| 396 |
+
"eval_loss": 1.8364616632461548,
|
| 397 |
+
"eval_precisions": [
|
| 398 |
+
0.1853626762201647,
|
| 399 |
+
0.1156396502935338,
|
| 400 |
+
0.08657517535834096,
|
| 401 |
+
0.08341024457775727
|
| 402 |
+
],
|
| 403 |
+
"eval_reference_length": 134020,
|
| 404 |
+
"eval_runtime": 809.3386,
|
| 405 |
+
"eval_samples_per_second": 15.762,
|
| 406 |
+
"eval_steps_per_second": 1.315,
|
| 407 |
+
"eval_translation_length": 107465,
|
| 408 |
+
"step": 28704
|
| 409 |
}
|
| 410 |
],
|
| 411 |
"logging_steps": 500,
|
| 412 |
"max_steps": 47840,
|
| 413 |
"num_train_epochs": 5,
|
| 414 |
"save_steps": 500,
|
| 415 |
+
"total_flos": 2.3584275781543526e+17,
|
| 416 |
"trial_name": null,
|
| 417 |
"trial_params": null
|
| 418 |
}
|