Training in progress, step 1000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737580392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a424a2b0f4bba2668648fad242ba4bc01129a8dfff560cb28160583a32cc8795
|
| 3 |
size 737580392
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475248442
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4147564c0aebe4adfc533256efb0c26f7c34d829d8cfd1faf86069796ae5e92b
|
| 3 |
size 1475248442
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1798148c8ddbef876c6eb294160aa711b50b4fbc294eeefa2c2edd714ea4965e
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 2.0,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -357,6 +357,356 @@
|
|
| 357 |
"learning_rate": 2.9707122774133083e-05,
|
| 358 |
"loss": 0.4085,
|
| 359 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
}
|
| 361 |
],
|
| 362 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.46860356138706655,
|
| 5 |
"eval_steps": 2.0,
|
| 6 |
+
"global_step": 1000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 357 |
"learning_rate": 2.9707122774133083e-05,
|
| 358 |
"loss": 0.4085,
|
| 359 |
"step": 500
|
| 360 |
+
},
|
| 361 |
+
{
|
| 362 |
+
"epoch": 0.23898781630740393,
|
| 363 |
+
"grad_norm": 1.9792900085449219,
|
| 364 |
+
"learning_rate": 2.970126522961575e-05,
|
| 365 |
+
"loss": 0.4074,
|
| 366 |
+
"step": 510
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"epoch": 0.2436738519212746,
|
| 370 |
+
"grad_norm": 2.3620975017547607,
|
| 371 |
+
"learning_rate": 2.9695407685098408e-05,
|
| 372 |
+
"loss": 0.4076,
|
| 373 |
+
"step": 520
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
"epoch": 0.24835988753514526,
|
| 377 |
+
"grad_norm": 1.7499839067459106,
|
| 378 |
+
"learning_rate": 2.968955014058107e-05,
|
| 379 |
+
"loss": 0.4033,
|
| 380 |
+
"step": 530
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"epoch": 0.2530459231490159,
|
| 384 |
+
"grad_norm": 2.367208480834961,
|
| 385 |
+
"learning_rate": 2.968369259606373e-05,
|
| 386 |
+
"loss": 0.4166,
|
| 387 |
+
"step": 540
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"epoch": 0.25773195876288657,
|
| 391 |
+
"grad_norm": 1.5346542596817017,
|
| 392 |
+
"learning_rate": 2.9677835051546395e-05,
|
| 393 |
+
"loss": 0.3523,
|
| 394 |
+
"step": 550
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"epoch": 0.2624179943767573,
|
| 398 |
+
"grad_norm": 2.177263021469116,
|
| 399 |
+
"learning_rate": 2.9671977507029054e-05,
|
| 400 |
+
"loss": 0.379,
|
| 401 |
+
"step": 560
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"epoch": 0.26710402999062793,
|
| 405 |
+
"grad_norm": 1.6328988075256348,
|
| 406 |
+
"learning_rate": 2.9666119962511716e-05,
|
| 407 |
+
"loss": 0.3885,
|
| 408 |
+
"step": 570
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"epoch": 0.2717900656044986,
|
| 412 |
+
"grad_norm": 2.2757279872894287,
|
| 413 |
+
"learning_rate": 2.9660262417994375e-05,
|
| 414 |
+
"loss": 0.3579,
|
| 415 |
+
"step": 580
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 0.27647610121836924,
|
| 419 |
+
"grad_norm": 1.8637791872024536,
|
| 420 |
+
"learning_rate": 2.965440487347704e-05,
|
| 421 |
+
"loss": 0.4114,
|
| 422 |
+
"step": 590
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"epoch": 0.28116213683223995,
|
| 426 |
+
"grad_norm": 1.9911636114120483,
|
| 427 |
+
"learning_rate": 2.96485473289597e-05,
|
| 428 |
+
"loss": 0.3793,
|
| 429 |
+
"step": 600
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"epoch": 0.2858481724461106,
|
| 433 |
+
"grad_norm": 2.3302886486053467,
|
| 434 |
+
"learning_rate": 2.9642689784442362e-05,
|
| 435 |
+
"loss": 0.3858,
|
| 436 |
+
"step": 610
|
| 437 |
+
},
|
| 438 |
+
{
|
| 439 |
+
"epoch": 0.29053420805998126,
|
| 440 |
+
"grad_norm": 1.5424076318740845,
|
| 441 |
+
"learning_rate": 2.9636832239925025e-05,
|
| 442 |
+
"loss": 0.3759,
|
| 443 |
+
"step": 620
|
| 444 |
+
},
|
| 445 |
+
{
|
| 446 |
+
"epoch": 0.2952202436738519,
|
| 447 |
+
"grad_norm": 2.340257406234741,
|
| 448 |
+
"learning_rate": 2.9630974695407687e-05,
|
| 449 |
+
"loss": 0.3868,
|
| 450 |
+
"step": 630
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"epoch": 0.29990627928772257,
|
| 454 |
+
"grad_norm": 1.9387434720993042,
|
| 455 |
+
"learning_rate": 2.9625117150890346e-05,
|
| 456 |
+
"loss": 0.381,
|
| 457 |
+
"step": 640
|
| 458 |
+
},
|
| 459 |
+
{
|
| 460 |
+
"epoch": 0.3045923149015933,
|
| 461 |
+
"grad_norm": 1.7054346799850464,
|
| 462 |
+
"learning_rate": 2.961925960637301e-05,
|
| 463 |
+
"loss": 0.3469,
|
| 464 |
+
"step": 650
|
| 465 |
+
},
|
| 466 |
+
{
|
| 467 |
+
"epoch": 0.30927835051546393,
|
| 468 |
+
"grad_norm": 1.667858600616455,
|
| 469 |
+
"learning_rate": 2.961340206185567e-05,
|
| 470 |
+
"loss": 0.3419,
|
| 471 |
+
"step": 660
|
| 472 |
+
},
|
| 473 |
+
{
|
| 474 |
+
"epoch": 0.3139643861293346,
|
| 475 |
+
"grad_norm": 2.5740160942077637,
|
| 476 |
+
"learning_rate": 2.9607544517338333e-05,
|
| 477 |
+
"loss": 0.3887,
|
| 478 |
+
"step": 670
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 0.31865042174320524,
|
| 482 |
+
"grad_norm": 1.893171787261963,
|
| 483 |
+
"learning_rate": 2.9601686972820992e-05,
|
| 484 |
+
"loss": 0.3661,
|
| 485 |
+
"step": 680
|
| 486 |
+
},
|
| 487 |
+
{
|
| 488 |
+
"epoch": 0.3233364573570759,
|
| 489 |
+
"grad_norm": 2.29376220703125,
|
| 490 |
+
"learning_rate": 2.9595829428303655e-05,
|
| 491 |
+
"loss": 0.408,
|
| 492 |
+
"step": 690
|
| 493 |
+
},
|
| 494 |
+
{
|
| 495 |
+
"epoch": 0.3280224929709466,
|
| 496 |
+
"grad_norm": 1.6235976219177246,
|
| 497 |
+
"learning_rate": 2.958997188378632e-05,
|
| 498 |
+
"loss": 0.3872,
|
| 499 |
+
"step": 700
|
| 500 |
+
},
|
| 501 |
+
{
|
| 502 |
+
"epoch": 0.33270852858481725,
|
| 503 |
+
"grad_norm": 1.8210023641586304,
|
| 504 |
+
"learning_rate": 2.958411433926898e-05,
|
| 505 |
+
"loss": 0.3276,
|
| 506 |
+
"step": 710
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
"epoch": 0.3373945641986879,
|
| 510 |
+
"grad_norm": 2.2085988521575928,
|
| 511 |
+
"learning_rate": 2.9578256794751642e-05,
|
| 512 |
+
"loss": 0.378,
|
| 513 |
+
"step": 720
|
| 514 |
+
},
|
| 515 |
+
{
|
| 516 |
+
"epoch": 0.34208059981255856,
|
| 517 |
+
"grad_norm": 1.9474214315414429,
|
| 518 |
+
"learning_rate": 2.95723992502343e-05,
|
| 519 |
+
"loss": 0.3429,
|
| 520 |
+
"step": 730
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"epoch": 0.3467666354264292,
|
| 524 |
+
"grad_norm": 1.4176770448684692,
|
| 525 |
+
"learning_rate": 2.9566541705716967e-05,
|
| 526 |
+
"loss": 0.3686,
|
| 527 |
+
"step": 740
|
| 528 |
+
},
|
| 529 |
+
{
|
| 530 |
+
"epoch": 0.3514526710402999,
|
| 531 |
+
"grad_norm": 1.9044578075408936,
|
| 532 |
+
"learning_rate": 2.9560684161199626e-05,
|
| 533 |
+
"loss": 0.3359,
|
| 534 |
+
"step": 750
|
| 535 |
+
},
|
| 536 |
+
{
|
| 537 |
+
"epoch": 0.3561387066541706,
|
| 538 |
+
"grad_norm": 2.1233558654785156,
|
| 539 |
+
"learning_rate": 2.9554826616682288e-05,
|
| 540 |
+
"loss": 0.3344,
|
| 541 |
+
"step": 760
|
| 542 |
+
},
|
| 543 |
+
{
|
| 544 |
+
"epoch": 0.36082474226804123,
|
| 545 |
+
"grad_norm": 2.1498265266418457,
|
| 546 |
+
"learning_rate": 2.954896907216495e-05,
|
| 547 |
+
"loss": 0.3844,
|
| 548 |
+
"step": 770
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"epoch": 0.3655107778819119,
|
| 552 |
+
"grad_norm": 1.8201932907104492,
|
| 553 |
+
"learning_rate": 2.9543111527647613e-05,
|
| 554 |
+
"loss": 0.3395,
|
| 555 |
+
"step": 780
|
| 556 |
+
},
|
| 557 |
+
{
|
| 558 |
+
"epoch": 0.3701968134957826,
|
| 559 |
+
"grad_norm": 1.6369259357452393,
|
| 560 |
+
"learning_rate": 2.953725398313027e-05,
|
| 561 |
+
"loss": 0.3046,
|
| 562 |
+
"step": 790
|
| 563 |
+
},
|
| 564 |
+
{
|
| 565 |
+
"epoch": 0.37488284910965325,
|
| 566 |
+
"grad_norm": 1.9067765474319458,
|
| 567 |
+
"learning_rate": 2.9531396438612934e-05,
|
| 568 |
+
"loss": 0.3411,
|
| 569 |
+
"step": 800
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 0.3795688847235239,
|
| 573 |
+
"grad_norm": 1.7853655815124512,
|
| 574 |
+
"learning_rate": 2.9525538894095596e-05,
|
| 575 |
+
"loss": 0.3942,
|
| 576 |
+
"step": 810
|
| 577 |
+
},
|
| 578 |
+
{
|
| 579 |
+
"epoch": 0.38425492033739456,
|
| 580 |
+
"grad_norm": 1.6428436040878296,
|
| 581 |
+
"learning_rate": 2.951968134957826e-05,
|
| 582 |
+
"loss": 0.3446,
|
| 583 |
+
"step": 820
|
| 584 |
+
},
|
| 585 |
+
{
|
| 586 |
+
"epoch": 0.3889409559512652,
|
| 587 |
+
"grad_norm": 1.761293649673462,
|
| 588 |
+
"learning_rate": 2.9513823805060918e-05,
|
| 589 |
+
"loss": 0.3368,
|
| 590 |
+
"step": 830
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"epoch": 0.3936269915651359,
|
| 594 |
+
"grad_norm": 1.8849061727523804,
|
| 595 |
+
"learning_rate": 2.950796626054358e-05,
|
| 596 |
+
"loss": 0.3595,
|
| 597 |
+
"step": 840
|
| 598 |
+
},
|
| 599 |
+
{
|
| 600 |
+
"epoch": 0.3983130271790066,
|
| 601 |
+
"grad_norm": 1.6948515176773071,
|
| 602 |
+
"learning_rate": 2.9502108716026243e-05,
|
| 603 |
+
"loss": 0.3225,
|
| 604 |
+
"step": 850
|
| 605 |
+
},
|
| 606 |
+
{
|
| 607 |
+
"epoch": 0.4029990627928772,
|
| 608 |
+
"grad_norm": 1.8712960481643677,
|
| 609 |
+
"learning_rate": 2.9496251171508905e-05,
|
| 610 |
+
"loss": 0.3528,
|
| 611 |
+
"step": 860
|
| 612 |
+
},
|
| 613 |
+
{
|
| 614 |
+
"epoch": 0.4076850984067479,
|
| 615 |
+
"grad_norm": 2.2388830184936523,
|
| 616 |
+
"learning_rate": 2.9490393626991564e-05,
|
| 617 |
+
"loss": 0.3981,
|
| 618 |
+
"step": 870
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"epoch": 0.41237113402061853,
|
| 622 |
+
"grad_norm": 1.8384082317352295,
|
| 623 |
+
"learning_rate": 2.9484536082474226e-05,
|
| 624 |
+
"loss": 0.3575,
|
| 625 |
+
"step": 880
|
| 626 |
+
},
|
| 627 |
+
{
|
| 628 |
+
"epoch": 0.41705716963448924,
|
| 629 |
+
"grad_norm": 2.068216323852539,
|
| 630 |
+
"learning_rate": 2.947867853795689e-05,
|
| 631 |
+
"loss": 0.3448,
|
| 632 |
+
"step": 890
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"epoch": 0.4217432052483599,
|
| 636 |
+
"grad_norm": 1.5086658000946045,
|
| 637 |
+
"learning_rate": 2.947282099343955e-05,
|
| 638 |
+
"loss": 0.3214,
|
| 639 |
+
"step": 900
|
| 640 |
+
},
|
| 641 |
+
{
|
| 642 |
+
"epoch": 0.42642924086223055,
|
| 643 |
+
"grad_norm": 1.5989112854003906,
|
| 644 |
+
"learning_rate": 2.946696344892221e-05,
|
| 645 |
+
"loss": 0.2719,
|
| 646 |
+
"step": 910
|
| 647 |
+
},
|
| 648 |
+
{
|
| 649 |
+
"epoch": 0.4311152764761012,
|
| 650 |
+
"grad_norm": 2.223735809326172,
|
| 651 |
+
"learning_rate": 2.9461105904404876e-05,
|
| 652 |
+
"loss": 0.3691,
|
| 653 |
+
"step": 920
|
| 654 |
+
},
|
| 655 |
+
{
|
| 656 |
+
"epoch": 0.43580131208997186,
|
| 657 |
+
"grad_norm": 1.928982138633728,
|
| 658 |
+
"learning_rate": 2.9455248359887535e-05,
|
| 659 |
+
"loss": 0.3446,
|
| 660 |
+
"step": 930
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"epoch": 0.44048734770384257,
|
| 664 |
+
"grad_norm": 1.7856864929199219,
|
| 665 |
+
"learning_rate": 2.9449390815370197e-05,
|
| 666 |
+
"loss": 0.2984,
|
| 667 |
+
"step": 940
|
| 668 |
+
},
|
| 669 |
+
{
|
| 670 |
+
"epoch": 0.4451733833177132,
|
| 671 |
+
"grad_norm": 2.044851779937744,
|
| 672 |
+
"learning_rate": 2.944353327085286e-05,
|
| 673 |
+
"loss": 0.3396,
|
| 674 |
+
"step": 950
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 0.4498594189315839,
|
| 678 |
+
"grad_norm": 1.5885918140411377,
|
| 679 |
+
"learning_rate": 2.9437675726335522e-05,
|
| 680 |
+
"loss": 0.3529,
|
| 681 |
+
"step": 960
|
| 682 |
+
},
|
| 683 |
+
{
|
| 684 |
+
"epoch": 0.45454545454545453,
|
| 685 |
+
"grad_norm": 2.08933162689209,
|
| 686 |
+
"learning_rate": 2.9431818181818184e-05,
|
| 687 |
+
"loss": 0.3561,
|
| 688 |
+
"step": 970
|
| 689 |
+
},
|
| 690 |
+
{
|
| 691 |
+
"epoch": 0.4592314901593252,
|
| 692 |
+
"grad_norm": 1.5980266332626343,
|
| 693 |
+
"learning_rate": 2.9425960637300843e-05,
|
| 694 |
+
"loss": 0.3512,
|
| 695 |
+
"step": 980
|
| 696 |
+
},
|
| 697 |
+
{
|
| 698 |
+
"epoch": 0.4639175257731959,
|
| 699 |
+
"grad_norm": 1.570770025253296,
|
| 700 |
+
"learning_rate": 2.9420103092783506e-05,
|
| 701 |
+
"loss": 0.3678,
|
| 702 |
+
"step": 990
|
| 703 |
+
},
|
| 704 |
+
{
|
| 705 |
+
"epoch": 0.46860356138706655,
|
| 706 |
+
"grad_norm": 1.8862972259521484,
|
| 707 |
+
"learning_rate": 2.9414245548266168e-05,
|
| 708 |
+
"loss": 0.3148,
|
| 709 |
+
"step": 1000
|
| 710 |
}
|
| 711 |
],
|
| 712 |
"logging_steps": 10,
|