ljcamargo commited on
Commit
f044ce5
·
verified ·
1 Parent(s): 81a62a6

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9a757842f36fd94aead624d11ae735bab0021a2cf1d22b12f1d19d8eb3745df
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f464bb34c079c49fc4894f86d25745fe89898016c82a86a608f7319e127060fb
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864f28f2db139c235def5468478555614e1208e6c7e1636a6be1a9a8a84d2903
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a0275491ae162fc646c048a499bb9160456c508f7be5abff3710269e6fdf4a
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee1ff4b6d230d52fbd75a1fdfc717e2baaa7034a01541c1dee54a5bf5dd662d6
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504b7bc543b9e5f039f6559d07b099507a66c15c86836ff5981e4eee51792c02
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f9876af7981b4f995b217441438c53a026fb406c344a1e30a18ad2545bd292
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb268d80dcd8f32a0b291e0f300bdc2df3898cc3661ea44beb8067d70741b7b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.32,
6
  "eval_steps": 500,
7
- "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -568,6 +568,76 @@
568
  "learning_rate": 3.4244372990353704e-05,
569
  "loss": 0.7991,
570
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  }
572
  ],
573
  "logging_steps": 10,
@@ -587,7 +657,7 @@
587
  "attributes": {}
588
  }
589
  },
590
- "total_flos": 1.442001474164736e+16,
591
  "train_batch_size": 2,
592
  "trial_name": null,
593
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.36,
6
  "eval_steps": 500,
7
+ "global_step": 900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
568
  "learning_rate": 3.4244372990353704e-05,
569
  "loss": 0.7991,
570
  "step": 800
571
+ },
572
+ {
573
+ "epoch": 0.324,
574
+ "grad_norm": 25.02106475830078,
575
+ "learning_rate": 3.404340836012862e-05,
576
+ "loss": 0.7284,
577
+ "step": 810
578
+ },
579
+ {
580
+ "epoch": 0.328,
581
+ "grad_norm": 14.035198211669922,
582
+ "learning_rate": 3.384244372990354e-05,
583
+ "loss": 0.7589,
584
+ "step": 820
585
+ },
586
+ {
587
+ "epoch": 0.332,
588
+ "grad_norm": 11.368013381958008,
589
+ "learning_rate": 3.364147909967846e-05,
590
+ "loss": 0.7638,
591
+ "step": 830
592
+ },
593
+ {
594
+ "epoch": 0.336,
595
+ "grad_norm": 21.951080322265625,
596
+ "learning_rate": 3.344051446945338e-05,
597
+ "loss": 0.7869,
598
+ "step": 840
599
+ },
600
+ {
601
+ "epoch": 0.34,
602
+ "grad_norm": 17.966073989868164,
603
+ "learning_rate": 3.32395498392283e-05,
604
+ "loss": 0.6792,
605
+ "step": 850
606
+ },
607
+ {
608
+ "epoch": 0.344,
609
+ "grad_norm": 36.02198791503906,
610
+ "learning_rate": 3.3038585209003216e-05,
611
+ "loss": 0.6968,
612
+ "step": 860
613
+ },
614
+ {
615
+ "epoch": 0.348,
616
+ "grad_norm": 32.43560791015625,
617
+ "learning_rate": 3.283762057877814e-05,
618
+ "loss": 0.7523,
619
+ "step": 870
620
+ },
621
+ {
622
+ "epoch": 0.352,
623
+ "grad_norm": 30.29490852355957,
624
+ "learning_rate": 3.263665594855306e-05,
625
+ "loss": 0.6548,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 0.356,
630
+ "grad_norm": 8.957921981811523,
631
+ "learning_rate": 3.243569131832798e-05,
632
+ "loss": 0.7151,
633
+ "step": 890
634
+ },
635
+ {
636
+ "epoch": 0.36,
637
+ "grad_norm": 15.583487510681152,
638
+ "learning_rate": 3.22347266881029e-05,
639
+ "loss": 0.625,
640
+ "step": 900
641
  }
642
  ],
643
  "logging_steps": 10,
 
657
  "attributes": {}
658
  }
659
  },
660
+ "total_flos": 1.6191274912395264e+16,
661
  "train_batch_size": 2,
662
  "trial_name": null,
663
  "trial_params": null