irishprancer commited on
Commit
4e46c6c
·
verified ·
1 Parent(s): b5ded2c

Training in progress, step 3750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e0ee8d2c58b69427f284488a89998c72e8bc4d55f0e05511449056f38778ee4
3
  size 264070024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39266e7c64916445e7e1ffff7265128efcb14a748bf4d12222a6942b9efa8b29
3
  size 264070024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df98e4ef5bda3ea5cd69e656addd52fa660b9f056128429fd1d464f169749690
3
  size 510816186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f0e7d5ff1fdaf9d83fb8c2078034a345f95639cc95b7cc4e41a9b5157c9280f
3
  size 510816186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65aab9aa5ddc29e8a011ec08503d5b4039cc9f073875c54a893f84d1988646ad
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28553b4def846f33c1be5403cbde93da158c64099cf7041a0cc043a46e7afc1
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a323baafebfbff16ec081ee1c71fdd1ae4559d76e02b5c97760983e07befc3
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fee260ff98ca59616802fa3adc6624edb5c906343c2fcd860cb04b54c36a948
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.084647297859192,
3
  "best_model_checkpoint": "./output/checkpoint-2850",
4
- "epoch": 2.5192442267319803,
5
  "eval_steps": 150,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2719,6 +2719,119 @@
2719
  "eval_samples_per_second": 14.759,
2720
  "eval_steps_per_second": 14.759,
2721
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2722
  }
2723
  ],
2724
  "logging_steps": 10,
@@ -2738,7 +2851,7 @@
2738
  "attributes": {}
2739
  }
2740
  },
2741
- "total_flos": 4.1982155608264704e+17,
2742
  "train_batch_size": 16,
2743
  "trial_name": null,
2744
  "trial_params": null
 
1
  {
2
  "best_metric": 1.084647297859192,
3
  "best_model_checkpoint": "./output/checkpoint-2850",
4
+ "epoch": 2.624212736179146,
5
  "eval_steps": 150,
6
+ "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2719
  "eval_samples_per_second": 14.759,
2720
  "eval_steps_per_second": 14.759,
2721
  "step": 3600
2722
+ },
2723
+ {
2724
+ "epoch": 2.526242127361791,
2725
+ "grad_norm": 5.060881614685059,
2726
+ "learning_rate": 1.8575520105593817e-05,
2727
+ "loss": 0.3923,
2728
+ "step": 3610
2729
+ },
2730
+ {
2731
+ "epoch": 2.5332400279916025,
2732
+ "grad_norm": 6.2594709396362305,
2733
+ "learning_rate": 1.8326822041411524e-05,
2734
+ "loss": 0.4288,
2735
+ "step": 3620
2736
+ },
2737
+ {
2738
+ "epoch": 2.540237928621414,
2739
+ "grad_norm": 3.7940304279327393,
2740
+ "learning_rate": 1.807942593751973e-05,
2741
+ "loss": 0.3647,
2742
+ "step": 3630
2743
+ },
2744
+ {
2745
+ "epoch": 2.5472358292512247,
2746
+ "grad_norm": 5.75860595703125,
2747
+ "learning_rate": 1.783334196340331e-05,
2748
+ "loss": 0.3543,
2749
+ "step": 3640
2750
+ },
2751
+ {
2752
+ "epoch": 2.5542337298810356,
2753
+ "grad_norm": 4.745769500732422,
2754
+ "learning_rate": 1.758858023461059e-05,
2755
+ "loss": 0.3768,
2756
+ "step": 3650
2757
+ },
2758
+ {
2759
+ "epoch": 2.561231630510847,
2760
+ "grad_norm": 5.631198406219482,
2761
+ "learning_rate": 1.7345150812337564e-05,
2762
+ "loss": 0.3826,
2763
+ "step": 3660
2764
+ },
2765
+ {
2766
+ "epoch": 2.5682295311406578,
2767
+ "grad_norm": 5.964677333831787,
2768
+ "learning_rate": 1.7103063703014372e-05,
2769
+ "loss": 0.3529,
2770
+ "step": 3670
2771
+ },
2772
+ {
2773
+ "epoch": 2.575227431770469,
2774
+ "grad_norm": 5.345946311950684,
2775
+ "learning_rate": 1.6862328857893854e-05,
2776
+ "loss": 0.3153,
2777
+ "step": 3680
2778
+ },
2779
+ {
2780
+ "epoch": 2.58222533240028,
2781
+ "grad_norm": 4.739876747131348,
2782
+ "learning_rate": 1.66229561726426e-05,
2783
+ "loss": 0.3521,
2784
+ "step": 3690
2785
+ },
2786
+ {
2787
+ "epoch": 2.589223233030091,
2788
+ "grad_norm": 5.451272964477539,
2789
+ "learning_rate": 1.6384955486934156e-05,
2790
+ "loss": 0.3648,
2791
+ "step": 3700
2792
+ },
2793
+ {
2794
+ "epoch": 2.596221133659902,
2795
+ "grad_norm": 5.133406162261963,
2796
+ "learning_rate": 1.614833658404454e-05,
2797
+ "loss": 0.4097,
2798
+ "step": 3710
2799
+ },
2800
+ {
2801
+ "epoch": 2.603219034289713,
2802
+ "grad_norm": 5.587733745574951,
2803
+ "learning_rate": 1.5913109190450032e-05,
2804
+ "loss": 0.3669,
2805
+ "step": 3720
2806
+ },
2807
+ {
2808
+ "epoch": 2.6102169349195243,
2809
+ "grad_norm": 4.743875026702881,
2810
+ "learning_rate": 1.567928297542749e-05,
2811
+ "loss": 0.3723,
2812
+ "step": 3730
2813
+ },
2814
+ {
2815
+ "epoch": 2.617214835549335,
2816
+ "grad_norm": 5.686123847961426,
2817
+ "learning_rate": 1.544686755065677e-05,
2818
+ "loss": 0.3921,
2819
+ "step": 3740
2820
+ },
2821
+ {
2822
+ "epoch": 2.624212736179146,
2823
+ "grad_norm": 6.688653469085693,
2824
+ "learning_rate": 1.5215872469825682e-05,
2825
+ "loss": 0.4218,
2826
+ "step": 3750
2827
+ },
2828
+ {
2829
+ "epoch": 2.624212736179146,
2830
+ "eval_loss": 1.1890102624893188,
2831
+ "eval_runtime": 33.4721,
2832
+ "eval_samples_per_second": 14.938,
2833
+ "eval_steps_per_second": 14.938,
2834
+ "step": 3750
2835
  }
2836
  ],
2837
  "logging_steps": 10,
 
2851
  "attributes": {}
2852
  }
2853
  },
2854
+ "total_flos": 4.372563304182989e+17,
2855
  "train_batch_size": 16,
2856
  "trial_name": null,
2857
  "trial_params": null