mgh6 commited on
Commit
da70395
·
verified ·
1 Parent(s): a0595c3

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5c3b0654a55323c3fcee30e714ee2d676246a3bfc98cc5f49a21c0197bf7658
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62b0ae2bece445149d95dac8be318a1e3cf663dc984da1c6c1763a330dde2ae
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebc195ac9c4196031197946a0392714b3efeebd00756085187f31f1fca6860db
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3b73b9e39e57f14acc8da4a5a2ee7f1b952789fcd7ddf666c5f54ad92c0646
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d98d2e99b55542cf6b1c45f3a424a53b2fc65122f42198a9dbcf07ba8693c50
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21dfc6c263d5ad0f8ba77e03600244b9f2781e61ae66cba4cff3c2ce6c58574f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13dc37c47f5922c1d1c30342794206ae261a0947af887e4fc89421ae169d8074
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6359143556c6c392e65dff6e223cd73982950d740ede67b2be0a429a6648f6d5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.999195864445721,
5
  "eval_steps": 50,
6
- "global_step": 1904,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -577,6 +577,81 @@
577
  "eval_samples_per_second": 41.593,
578
  "eval_steps_per_second": 20.797,
579
  "step": 1900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
  }
581
  ],
582
  "logging_steps": 50,
@@ -596,7 +671,7 @@
596
  "attributes": {}
597
  }
598
  },
599
- "total_flos": 4.912264077125878e+17,
600
  "train_batch_size": 2,
601
  "trial_name": null,
602
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.999080987937967,
5
  "eval_steps": 50,
6
+ "global_step": 2176,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
577
  "eval_samples_per_second": 41.593,
578
  "eval_steps_per_second": 20.797,
579
  "step": 1900
580
+ },
581
+ {
582
+ "epoch": 7.168294083859851,
583
+ "grad_norm": 46.392173767089844,
584
+ "learning_rate": 2.8308823529411766e-05,
585
+ "loss": 0.8475,
586
+ "step": 1950
587
+ },
588
+ {
589
+ "epoch": 7.168294083859851,
590
+ "eval_loss": 1.2355010509490967,
591
+ "eval_runtime": 116.0581,
592
+ "eval_samples_per_second": 41.583,
593
+ "eval_steps_per_second": 20.791,
594
+ "step": 1950
595
+ },
596
+ {
597
+ "epoch": 7.352096496266514,
598
+ "grad_norm": 64.82035827636719,
599
+ "learning_rate": 2.647058823529412e-05,
600
+ "loss": 0.8496,
601
+ "step": 2000
602
+ },
603
+ {
604
+ "epoch": 7.352096496266514,
605
+ "eval_loss": 1.2320975065231323,
606
+ "eval_runtime": 115.9151,
607
+ "eval_samples_per_second": 41.634,
608
+ "eval_steps_per_second": 20.817,
609
+ "step": 2000
610
+ },
611
+ {
612
+ "epoch": 7.535898908673176,
613
+ "grad_norm": 46.27527618408203,
614
+ "learning_rate": 2.4632352941176472e-05,
615
+ "loss": 0.8452,
616
+ "step": 2050
617
+ },
618
+ {
619
+ "epoch": 7.535898908673176,
620
+ "eval_loss": 1.2323057651519775,
621
+ "eval_runtime": 115.981,
622
+ "eval_samples_per_second": 41.61,
623
+ "eval_steps_per_second": 20.805,
624
+ "step": 2050
625
+ },
626
+ {
627
+ "epoch": 7.719701321079839,
628
+ "grad_norm": 52.665435791015625,
629
+ "learning_rate": 2.2794117647058825e-05,
630
+ "loss": 0.8423,
631
+ "step": 2100
632
+ },
633
+ {
634
+ "epoch": 7.719701321079839,
635
+ "eval_loss": 1.2322068214416504,
636
+ "eval_runtime": 115.9188,
637
+ "eval_samples_per_second": 41.633,
638
+ "eval_steps_per_second": 20.816,
639
+ "step": 2100
640
+ },
641
+ {
642
+ "epoch": 7.903503733486502,
643
+ "grad_norm": 74.63914489746094,
644
+ "learning_rate": 2.0955882352941178e-05,
645
+ "loss": 0.8421,
646
+ "step": 2150
647
+ },
648
+ {
649
+ "epoch": 7.903503733486502,
650
+ "eval_loss": 1.2284280061721802,
651
+ "eval_runtime": 115.8808,
652
+ "eval_samples_per_second": 41.646,
653
+ "eval_steps_per_second": 20.823,
654
+ "step": 2150
655
  }
656
  ],
657
  "logging_steps": 50,
 
671
  "attributes": {}
672
  }
673
  },
674
+ "total_flos": 5.614783797825372e+17,
675
  "train_batch_size": 2,
676
  "trial_name": null,
677
  "trial_params": null