FredericFan commited on
Commit
542af67
·
verified ·
1 Parent(s): 32b0af1

Training in progress, step 18000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8ffabfeec9f6eebd2a7900dd8bf8405bf07a25820f0a7c5d6b4d5a8bfa48293
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e87cb14acd4e6fe6bcad897a8f042f31acae6fb2e34f61ed68b17e469e4f393
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c081cd44daea3b6fc16a00cbe0211572d0c0a312f0060bc10c918341394bda2
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e64bb265a77f08e4da4240e30c68e0a0ae7eb01df9530a76fcba45020acfd6b
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afa26191eaed430ee9b2402f370925cf1a280b0be2f2a361324924659b56d574
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1c5d0777adfd9f21aec03650885c94d8b0325360d164ecc81c3a16d777cc03
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e544656266b7150eb42d485eede2fd99a780796aabfa23eadeeb94220226471
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bc3c023f1bacc02e7db4990bce2636fd592e49ea544612a30431a586cc170fc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0821109265089035,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
4
- "epoch": 1.4,
5
  "eval_steps": 500,
6
- "global_step": 17500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2737,6 +2737,84 @@
2737
  "eval_samples_per_second": 22.71,
2738
  "eval_steps_per_second": 5.677,
2739
  "step": 17500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2740
  }
2741
  ],
2742
  "logging_steps": 50,
@@ -2756,7 +2834,7 @@
2756
  "attributes": {}
2757
  }
2758
  },
2759
- "total_flos": 4.26270523392e+16,
2760
  "train_batch_size": 4,
2761
  "trial_name": null,
2762
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0821109265089035,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
4
+ "epoch": 1.44,
5
  "eval_steps": 500,
6
+ "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2737
  "eval_samples_per_second": 22.71,
2738
  "eval_steps_per_second": 5.677,
2739
  "step": 17500
2740
+ },
2741
+ {
2742
+ "epoch": 1.404,
2743
+ "grad_norm": 0.1634778082370758,
2744
+ "learning_rate": 8.9448e-06,
2745
+ "loss": 0.0532,
2746
+ "step": 17550
2747
+ },
2748
+ {
2749
+ "epoch": 1.408,
2750
+ "grad_norm": 0.12161219865083694,
2751
+ "learning_rate": 8.8848e-06,
2752
+ "loss": 0.0541,
2753
+ "step": 17600
2754
+ },
2755
+ {
2756
+ "epoch": 1.412,
2757
+ "grad_norm": 0.15347328782081604,
2758
+ "learning_rate": 8.824799999999999e-06,
2759
+ "loss": 0.0535,
2760
+ "step": 17650
2761
+ },
2762
+ {
2763
+ "epoch": 1.416,
2764
+ "grad_norm": 0.07917541265487671,
2765
+ "learning_rate": 8.7648e-06,
2766
+ "loss": 0.0552,
2767
+ "step": 17700
2768
+ },
2769
+ {
2770
+ "epoch": 1.42,
2771
+ "grad_norm": 0.15406325459480286,
2772
+ "learning_rate": 8.7048e-06,
2773
+ "loss": 0.0584,
2774
+ "step": 17750
2775
+ },
2776
+ {
2777
+ "epoch": 1.424,
2778
+ "grad_norm": 0.21300417184829712,
2779
+ "learning_rate": 8.6448e-06,
2780
+ "loss": 0.0565,
2781
+ "step": 17800
2782
+ },
2783
+ {
2784
+ "epoch": 1.428,
2785
+ "grad_norm": 0.20691223442554474,
2786
+ "learning_rate": 8.584800000000001e-06,
2787
+ "loss": 0.0504,
2788
+ "step": 17850
2789
+ },
2790
+ {
2791
+ "epoch": 1.432,
2792
+ "grad_norm": 0.1366143375635147,
2793
+ "learning_rate": 8.5248e-06,
2794
+ "loss": 0.0573,
2795
+ "step": 17900
2796
+ },
2797
+ {
2798
+ "epoch": 1.436,
2799
+ "grad_norm": 0.11994505673646927,
2800
+ "learning_rate": 8.4648e-06,
2801
+ "loss": 0.0576,
2802
+ "step": 17950
2803
+ },
2804
+ {
2805
+ "epoch": 1.44,
2806
+ "grad_norm": 0.06461376696825027,
2807
+ "learning_rate": 8.404800000000001e-06,
2808
+ "loss": 0.0595,
2809
+ "step": 18000
2810
+ },
2811
+ {
2812
+ "epoch": 1.44,
2813
+ "eval_loss": 0.08217783272266388,
2814
+ "eval_runtime": 88.0706,
2815
+ "eval_samples_per_second": 22.709,
2816
+ "eval_steps_per_second": 5.677,
2817
+ "step": 18000
2818
  }
2819
  ],
2820
  "logging_steps": 50,
 
2834
  "attributes": {}
2835
  }
2836
  },
2837
+ "total_flos": 4.384496812032e+16,
2838
  "train_batch_size": 4,
2839
  "trial_name": null,
2840
  "trial_params": null