Training in progress, step 31000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 563074920
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5bbb33796637d85d181dd86914f0d0b2932daf04a02e2d42b0e675ffd28388a
|
| 3 |
size 563074920
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1125916346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:404568263d026535dcc44bb135fbb61c7e3760b5962c18e72f460d9b5076b3b1
|
| 3 |
size 1125916346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f9232b4b974a65603075b06bb82ca61a1267905abb281ba5363cf0b0ac176db
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0aa4aa16ed53784eb6010613ed4115c7bfda9657643c7abb9d4d9e40642eb9e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5348,11 +5348,189 @@
|
|
| 5348 |
"eval_steps_per_second": 8.76,
|
| 5349 |
"num_input_tokens_seen": 7864320000,
|
| 5350 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5351 |
}
|
| 5352 |
],
|
| 5353 |
"logging_steps": 50,
|
| 5354 |
-
"max_steps":
|
| 5355 |
-
"num_input_tokens_seen":
|
| 5356 |
"num_train_epochs": 1,
|
| 5357 |
"save_steps": 1000,
|
| 5358 |
"stateful_callbacks": {
|
|
@@ -5362,12 +5540,12 @@
|
|
| 5362 |
"should_evaluate": false,
|
| 5363 |
"should_log": false,
|
| 5364 |
"should_save": true,
|
| 5365 |
-
"should_training_stop":
|
| 5366 |
},
|
| 5367 |
"attributes": {}
|
| 5368 |
}
|
| 5369 |
},
|
| 5370 |
-
"total_flos": 5.
|
| 5371 |
"train_batch_size": 64,
|
| 5372 |
"trial_name": null,
|
| 5373 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.20852146509871977,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 31000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5348 |
"eval_steps_per_second": 8.76,
|
| 5349 |
"num_input_tokens_seen": 7864320000,
|
| 5350 |
"step": 30000
|
| 5351 |
+
},
|
| 5352 |
+
{
|
| 5353 |
+
"epoch": 0.20213129116827513,
|
| 5354 |
+
"grad_norm": 0.7275823950767517,
|
| 5355 |
+
"learning_rate": 0.0002881031482247361,
|
| 5356 |
+
"loss": 12.0089,
|
| 5357 |
+
"num_input_tokens_seen": 7877427200,
|
| 5358 |
+
"step": 30050
|
| 5359 |
+
},
|
| 5360 |
+
{
|
| 5361 |
+
"epoch": 0.20246761611198275,
|
| 5362 |
+
"grad_norm": 0.7593051195144653,
|
| 5363 |
+
"learning_rate": 0.0002904816199505797,
|
| 5364 |
+
"loss": 12.0389,
|
| 5365 |
+
"num_input_tokens_seen": 7890534400,
|
| 5366 |
+
"step": 30100
|
| 5367 |
+
},
|
| 5368 |
+
{
|
| 5369 |
+
"epoch": 0.20280394105569036,
|
| 5370 |
+
"grad_norm": 0.7933290004730225,
|
| 5371 |
+
"learning_rate": 0.00029286069073616763,
|
| 5372 |
+
"loss": 12.0537,
|
| 5373 |
+
"num_input_tokens_seen": 7903641600,
|
| 5374 |
+
"step": 30150
|
| 5375 |
+
},
|
| 5376 |
+
{
|
| 5377 |
+
"epoch": 0.20314026599939797,
|
| 5378 |
+
"grad_norm": 0.736951470375061,
|
| 5379 |
+
"learning_rate": 0.0002952402108495577,
|
| 5380 |
+
"loss": 12.0687,
|
| 5381 |
+
"num_input_tokens_seen": 7916748800,
|
| 5382 |
+
"step": 30200
|
| 5383 |
+
},
|
| 5384 |
+
{
|
| 5385 |
+
"epoch": 0.20347659094310558,
|
| 5386 |
+
"grad_norm": 0.7448037266731262,
|
| 5387 |
+
"learning_rate": 0.0002976200305305268,
|
| 5388 |
+
"loss": 12.0549,
|
| 5389 |
+
"num_input_tokens_seen": 7929856000,
|
| 5390 |
+
"step": 30250
|
| 5391 |
+
},
|
| 5392 |
+
{
|
| 5393 |
+
"epoch": 0.2038129158868132,
|
| 5394 |
+
"grad_norm": 0.7063918113708496,
|
| 5395 |
+
"learning_rate": 0.0002999999999999999,
|
| 5396 |
+
"loss": 12.0769,
|
| 5397 |
+
"num_input_tokens_seen": 7942963200,
|
| 5398 |
+
"step": 30300
|
| 5399 |
+
},
|
| 5400 |
+
{
|
| 5401 |
+
"epoch": 0.20414924083052083,
|
| 5402 |
+
"grad_norm": 0.7379609942436218,
|
| 5403 |
+
"learning_rate": 0.000302379969469473,
|
| 5404 |
+
"loss": 12.1145,
|
| 5405 |
+
"num_input_tokens_seen": 7956070400,
|
| 5406 |
+
"step": 30350
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 0.20448556577422844,
|
| 5410 |
+
"grad_norm": 0.7159172892570496,
|
| 5411 |
+
"learning_rate": 0.0003047597891504424,
|
| 5412 |
+
"loss": 12.1304,
|
| 5413 |
+
"num_input_tokens_seen": 7969177600,
|
| 5414 |
+
"step": 30400
|
| 5415 |
+
},
|
| 5416 |
+
{
|
| 5417 |
+
"epoch": 0.20482189071793605,
|
| 5418 |
+
"grad_norm": 0.759340226650238,
|
| 5419 |
+
"learning_rate": 0.00030713930926383194,
|
| 5420 |
+
"loss": 12.1011,
|
| 5421 |
+
"num_input_tokens_seen": 7982284800,
|
| 5422 |
+
"step": 30450
|
| 5423 |
+
},
|
| 5424 |
+
{
|
| 5425 |
+
"epoch": 0.20515821566164366,
|
| 5426 |
+
"grad_norm": 0.782768189907074,
|
| 5427 |
+
"learning_rate": 0.00030951838004942016,
|
| 5428 |
+
"loss": 12.1276,
|
| 5429 |
+
"num_input_tokens_seen": 7995392000,
|
| 5430 |
+
"step": 30500
|
| 5431 |
+
},
|
| 5432 |
+
{
|
| 5433 |
+
"epoch": 0.20515821566164366,
|
| 5434 |
+
"eval_loss": 2.9330999851226807,
|
| 5435 |
+
"eval_runtime": 143.3174,
|
| 5436 |
+
"eval_samples_per_second": 34.888,
|
| 5437 |
+
"eval_steps_per_second": 8.722,
|
| 5438 |
+
"num_input_tokens_seen": 7995392000,
|
| 5439 |
+
"step": 30500
|
| 5440 |
+
},
|
| 5441 |
+
{
|
| 5442 |
+
"epoch": 0.20549454060535127,
|
| 5443 |
+
"grad_norm": 0.7521361112594604,
|
| 5444 |
+
"learning_rate": 0.00031189685177526375,
|
| 5445 |
+
"loss": 12.1475,
|
| 5446 |
+
"num_input_tokens_seen": 8008499200,
|
| 5447 |
+
"step": 30550
|
| 5448 |
+
},
|
| 5449 |
+
{
|
| 5450 |
+
"epoch": 0.20583086554905888,
|
| 5451 |
+
"grad_norm": 0.752306342124939,
|
| 5452 |
+
"learning_rate": 0.00031427457474712264,
|
| 5453 |
+
"loss": 12.0914,
|
| 5454 |
+
"num_input_tokens_seen": 8021606400,
|
| 5455 |
+
"step": 30600
|
| 5456 |
+
},
|
| 5457 |
+
{
|
| 5458 |
+
"epoch": 0.2061671904927665,
|
| 5459 |
+
"grad_norm": 0.6963069438934326,
|
| 5460 |
+
"learning_rate": 0.0003166513993178817,
|
| 5461 |
+
"loss": 12.1272,
|
| 5462 |
+
"num_input_tokens_seen": 8034713600,
|
| 5463 |
+
"step": 30650
|
| 5464 |
+
},
|
| 5465 |
+
{
|
| 5466 |
+
"epoch": 0.2065035154364741,
|
| 5467 |
+
"grad_norm": 0.7007436752319336,
|
| 5468 |
+
"learning_rate": 0.0003190271758969692,
|
| 5469 |
+
"loss": 12.1085,
|
| 5470 |
+
"num_input_tokens_seen": 8047820800,
|
| 5471 |
+
"step": 30700
|
| 5472 |
+
},
|
| 5473 |
+
{
|
| 5474 |
+
"epoch": 0.20683984038018172,
|
| 5475 |
+
"grad_norm": 0.7034767270088196,
|
| 5476 |
+
"learning_rate": 0.00032140175495976947,
|
| 5477 |
+
"loss": 12.1114,
|
| 5478 |
+
"num_input_tokens_seen": 8060928000,
|
| 5479 |
+
"step": 30750
|
| 5480 |
+
},
|
| 5481 |
+
{
|
| 5482 |
+
"epoch": 0.20717616532388933,
|
| 5483 |
+
"grad_norm": 0.7317435145378113,
|
| 5484 |
+
"learning_rate": 0.0003237749870570365,
|
| 5485 |
+
"loss": 12.0728,
|
| 5486 |
+
"num_input_tokens_seen": 8074035200,
|
| 5487 |
+
"step": 30800
|
| 5488 |
+
},
|
| 5489 |
+
{
|
| 5490 |
+
"epoch": 0.20751249026759694,
|
| 5491 |
+
"grad_norm": 0.665651261806488,
|
| 5492 |
+
"learning_rate": 0.0003261467228242976,
|
| 5493 |
+
"loss": 12.1099,
|
| 5494 |
+
"num_input_tokens_seen": 8087142400,
|
| 5495 |
+
"step": 30850
|
| 5496 |
+
},
|
| 5497 |
+
{
|
| 5498 |
+
"epoch": 0.20784881521130455,
|
| 5499 |
+
"grad_norm": 0.7023760080337524,
|
| 5500 |
+
"learning_rate": 0.0003285168129912546,
|
| 5501 |
+
"loss": 12.1188,
|
| 5502 |
+
"num_input_tokens_seen": 8100249600,
|
| 5503 |
+
"step": 30900
|
| 5504 |
+
},
|
| 5505 |
+
{
|
| 5506 |
+
"epoch": 0.20818514015501216,
|
| 5507 |
+
"grad_norm": 0.7026780247688293,
|
| 5508 |
+
"learning_rate": 0.00033088510839118004,
|
| 5509 |
+
"loss": 12.0884,
|
| 5510 |
+
"num_input_tokens_seen": 8113356800,
|
| 5511 |
+
"step": 30950
|
| 5512 |
+
},
|
| 5513 |
+
{
|
| 5514 |
+
"epoch": 0.20852146509871977,
|
| 5515 |
+
"grad_norm": 0.7397706508636475,
|
| 5516 |
+
"learning_rate": 0.00033325145997030323,
|
| 5517 |
+
"loss": 12.0894,
|
| 5518 |
+
"num_input_tokens_seen": 8126464000,
|
| 5519 |
+
"step": 31000
|
| 5520 |
+
},
|
| 5521 |
+
{
|
| 5522 |
+
"epoch": 0.20852146509871977,
|
| 5523 |
+
"eval_loss": 2.9383528232574463,
|
| 5524 |
+
"eval_runtime": 144.6078,
|
| 5525 |
+
"eval_samples_per_second": 34.576,
|
| 5526 |
+
"eval_steps_per_second": 8.644,
|
| 5527 |
+
"num_input_tokens_seen": 8126464000,
|
| 5528 |
+
"step": 31000
|
| 5529 |
}
|
| 5530 |
],
|
| 5531 |
"logging_steps": 50,
|
| 5532 |
+
"max_steps": 60000,
|
| 5533 |
+
"num_input_tokens_seen": 8126464000,
|
| 5534 |
"num_train_epochs": 1,
|
| 5535 |
"save_steps": 1000,
|
| 5536 |
"stateful_callbacks": {
|
|
|
|
| 5540 |
"should_evaluate": false,
|
| 5541 |
"should_log": false,
|
| 5542 |
"should_save": true,
|
| 5543 |
+
"should_training_stop": false
|
| 5544 |
},
|
| 5545 |
"attributes": {}
|
| 5546 |
}
|
| 5547 |
},
|
| 5548 |
+
"total_flos": 5.17832328609792e+18,
|
| 5549 |
"train_batch_size": 64,
|
| 5550 |
"trial_name": null,
|
| 5551 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37300a576f29a5a8ddf81ea75e13d6c1ee5bf582f11fc6860569d8fcc97499d1
|
| 3 |
+
size 6008
|