ljcamargo commited on
Commit
72e0005
·
verified ·
1 Parent(s): e4480a8

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ef549acb7bb3a26b9a1d8c83faca397de8618a2dce8c81bde8e287f33fb6c31
3
  size 2558403928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1935637205cb627b948fe1329a80486b1da1feb7f14f8a0e15acab010a97b90c
3
  size 2558403928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f91d2444da719f4454789524b172c58bc341e905a9b460651c04a077f667609
3
- size 1313044361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:575872657bd8f5c69c8e9a049519a8b9a0d9795ca6890003a302f811f9d4108a
3
+ size 1313638993
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0d5dfce4350324a9dd27602ce6d66bb933782beacd43e5d1fc128755bd9060e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d7cb8df90bbc1a1f334913d48d210d3a9a45cf39cb2aba7ed6759fa8b44c3a
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb7fde5111803012042c93a73aa191336bb6e10b3ad44f6bd1d94fc7008a22b6
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0b4230f34cfc1b81dc2c15ef8d265bdd348193f5a746ca2018df11549c7ac0
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27384781b4bab02662f6aa01507d1435cf787b396a01371737e0e695f3099df9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46dbc8a28dada13dfcd70ea962672a500c66aa01dc461c5d292f261a3ca3d0fc
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.48,
6
  "eval_steps": 500,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -435,6 +435,216 @@
435
  "learning_rate": 0.00017966673056530686,
436
  "loss": 7.3859,
437
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  }
439
  ],
440
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.72,
6
  "eval_steps": 500,
7
+ "global_step": 900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
435
  "learning_rate": 0.00017966673056530686,
436
  "loss": 7.3859,
437
  "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.488,
441
+ "grad_norm": 7.625977516174316,
442
+ "learning_rate": 0.00017886740298196863,
443
+ "loss": 7.0858,
444
+ "step": 610
445
+ },
446
+ {
447
+ "epoch": 0.496,
448
+ "grad_norm": 5.039551734924316,
449
+ "learning_rate": 0.00017805451671784516,
450
+ "loss": 7.4888,
451
+ "step": 620
452
+ },
453
+ {
454
+ "epoch": 0.504,
455
+ "grad_norm": 5.905925273895264,
456
+ "learning_rate": 0.00017722821152224924,
457
+ "loss": 7.2393,
458
+ "step": 630
459
+ },
460
+ {
461
+ "epoch": 0.512,
462
+ "grad_norm": 9.78266716003418,
463
+ "learning_rate": 0.00017638862945144182,
464
+ "loss": 7.4102,
465
+ "step": 640
466
+ },
467
+ {
468
+ "epoch": 0.52,
469
+ "grad_norm": 6.097099781036377,
470
+ "learning_rate": 0.00017553591484421004,
471
+ "loss": 7.297,
472
+ "step": 650
473
+ },
474
+ {
475
+ "epoch": 0.528,
476
+ "grad_norm": 4.922943115234375,
477
+ "learning_rate": 0.00017467021429705285,
478
+ "loss": 7.3741,
479
+ "step": 660
480
+ },
481
+ {
482
+ "epoch": 0.536,
483
+ "grad_norm": 5.79689884185791,
484
+ "learning_rate": 0.00017379167663897856,
485
+ "loss": 7.1571,
486
+ "step": 670
487
+ },
488
+ {
489
+ "epoch": 0.544,
490
+ "grad_norm": 3.795285701751709,
491
+ "learning_rate": 0.00017290045290591858,
492
+ "loss": 6.8519,
493
+ "step": 680
494
+ },
495
+ {
496
+ "epoch": 0.552,
497
+ "grad_norm": 4.697098731994629,
498
+ "learning_rate": 0.0001719966963147616,
499
+ "loss": 7.111,
500
+ "step": 690
501
+ },
502
+ {
503
+ "epoch": 0.56,
504
+ "grad_norm": 4.46597146987915,
505
+ "learning_rate": 0.000171080562237013,
506
+ "loss": 7.0099,
507
+ "step": 700
508
+ },
509
+ {
510
+ "epoch": 0.568,
511
+ "grad_norm": 6.007630348205566,
512
+ "learning_rate": 0.00017015220817208376,
513
+ "loss": 7.326,
514
+ "step": 710
515
+ },
516
+ {
517
+ "epoch": 0.576,
518
+ "grad_norm": 4.53595495223999,
519
+ "learning_rate": 0.0001692117937202136,
520
+ "loss": 7.0785,
521
+ "step": 720
522
+ },
523
+ {
524
+ "epoch": 0.584,
525
+ "grad_norm": 6.850202560424805,
526
+ "learning_rate": 0.00016825948055503294,
527
+ "loss": 7.2174,
528
+ "step": 730
529
+ },
530
+ {
531
+ "epoch": 0.592,
532
+ "grad_norm": 5.931501865386963,
533
+ "learning_rate": 0.00016729543239576828,
534
+ "loss": 6.995,
535
+ "step": 740
536
+ },
537
+ {
538
+ "epoch": 0.6,
539
+ "grad_norm": 5.900391101837158,
540
+ "learning_rate": 0.0001663198149790961,
541
+ "loss": 6.8995,
542
+ "step": 750
543
+ },
544
+ {
545
+ "epoch": 0.608,
546
+ "grad_norm": 5.841864109039307,
547
+ "learning_rate": 0.00016533279603064978,
548
+ "loss": 6.7632,
549
+ "step": 760
550
+ },
551
+ {
552
+ "epoch": 0.616,
553
+ "grad_norm": 5.902273178100586,
554
+ "learning_rate": 0.00016433454523618482,
555
+ "loss": 6.9055,
556
+ "step": 770
557
+ },
558
+ {
559
+ "epoch": 0.624,
560
+ "grad_norm": 5.881319046020508,
561
+ "learning_rate": 0.00016332523421240658,
562
+ "loss": 6.686,
563
+ "step": 780
564
+ },
565
+ {
566
+ "epoch": 0.632,
567
+ "grad_norm": 5.178507328033447,
568
+ "learning_rate": 0.00016230503647746657,
569
+ "loss": 6.6281,
570
+ "step": 790
571
+ },
572
+ {
573
+ "epoch": 0.64,
574
+ "grad_norm": 4.807728290557861,
575
+ "learning_rate": 0.00016127412742113185,
576
+ "loss": 6.9373,
577
+ "step": 800
578
+ },
579
+ {
580
+ "epoch": 0.648,
581
+ "grad_norm": 5.6972975730896,
582
+ "learning_rate": 0.000160232684274632,
583
+ "loss": 6.7103,
584
+ "step": 810
585
+ },
586
+ {
587
+ "epoch": 0.656,
588
+ "grad_norm": 5.271605968475342,
589
+ "learning_rate": 0.00015918088608019043,
590
+ "loss": 6.9209,
591
+ "step": 820
592
+ },
593
+ {
594
+ "epoch": 0.664,
595
+ "grad_norm": 5.366481304168701,
596
+ "learning_rate": 0.00015811891366024358,
597
+ "loss": 6.9491,
598
+ "step": 830
599
+ },
600
+ {
601
+ "epoch": 0.672,
602
+ "grad_norm": 4.8482866287231445,
603
+ "learning_rate": 0.00015704694958635468,
604
+ "loss": 6.6238,
605
+ "step": 840
606
+ },
607
+ {
608
+ "epoch": 0.68,
609
+ "grad_norm": 7.680044651031494,
610
+ "learning_rate": 0.0001559651781478263,
611
+ "loss": 6.7933,
612
+ "step": 850
613
+ },
614
+ {
615
+ "epoch": 0.688,
616
+ "grad_norm": 4.3361496925354,
617
+ "learning_rate": 0.00015487378532001782,
618
+ "loss": 6.6105,
619
+ "step": 860
620
+ },
621
+ {
622
+ "epoch": 0.696,
623
+ "grad_norm": 8.266775131225586,
624
+ "learning_rate": 0.0001537729587323732,
625
+ "loss": 6.5274,
626
+ "step": 870
627
+ },
628
+ {
629
+ "epoch": 0.704,
630
+ "grad_norm": 5.751520156860352,
631
+ "learning_rate": 0.00015266288763616403,
632
+ "loss": 6.6585,
633
+ "step": 880
634
+ },
635
+ {
636
+ "epoch": 0.712,
637
+ "grad_norm": 6.401413440704346,
638
+ "learning_rate": 0.000151543762871954,
639
+ "loss": 6.5739,
640
+ "step": 890
641
+ },
642
+ {
643
+ "epoch": 0.72,
644
+ "grad_norm": 5.519962310791016,
645
+ "learning_rate": 0.0001504157768367901,
646
+ "loss": 6.6899,
647
+ "step": 900
648
  }
649
  ],
650
  "logging_steps": 10,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2cbad370ceca105eb29fd83703abdf3f11645c66605ea050dcf46365bfd8be8
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee90b3fca998c33de74462bb9e763a66d85eea62f73bbd6b86fed468dacef643
3
  size 5905