FredericFan commited on
Commit
a9c8e53
·
verified ·
1 Parent(s): e6fb16a

Training in progress, step 17500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee7142eae13c2e07731af54b545ac51bda5556ea3f592995cf80a6a611f7a001
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ffabfeec9f6eebd2a7900dd8bf8405bf07a25820f0a7c5d6b4d5a8bfa48293
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8599a0264032f7e486689cdb52a108f56cf68a52b701dfa3ff59464f8da7ad06
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c081cd44daea3b6fc16a00cbe0211572d0c0a312f0060bc10c918341394bda2
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c552c5778e7213237c153e06b58720168ca8baccd54cd6b284f96928b35e9241
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa26191eaed430ee9b2402f370925cf1a280b0be2f2a361324924659b56d574
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:570514209dc9b87d23b36d54c5948989ae9b03cae5fc9a097914a9ef458ad51f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e544656266b7150eb42d485eede2fd99a780796aabfa23eadeeb94220226471
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08219126611948013,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-17000",
4
- "epoch": 1.3599999999999999,
5
  "eval_steps": 500,
6
- "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2659,6 +2659,84 @@
2659
  "eval_samples_per_second": 22.714,
2660
  "eval_steps_per_second": 5.679,
2661
  "step": 17000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2662
  }
2663
  ],
2664
  "logging_steps": 50,
@@ -2678,7 +2756,7 @@
2678
  "attributes": {}
2679
  }
2680
  },
2681
- "total_flos": 4.140913655808e+16,
2682
  "train_batch_size": 4,
2683
  "trial_name": null,
2684
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0821109265089035,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
4
+ "epoch": 1.4,
5
  "eval_steps": 500,
6
+ "global_step": 17500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2659
  "eval_samples_per_second": 22.714,
2660
  "eval_steps_per_second": 5.679,
2661
  "step": 17000
2662
+ },
2663
+ {
2664
+ "epoch": 1.3639999999999999,
2665
+ "grad_norm": 0.19439847767353058,
2666
+ "learning_rate": 9.5448e-06,
2667
+ "loss": 0.0566,
2668
+ "step": 17050
2669
+ },
2670
+ {
2671
+ "epoch": 1.3679999999999999,
2672
+ "grad_norm": 0.1604561060667038,
2673
+ "learning_rate": 9.4848e-06,
2674
+ "loss": 0.0532,
2675
+ "step": 17100
2676
+ },
2677
+ {
2678
+ "epoch": 1.3719999999999999,
2679
+ "grad_norm": 0.13188685476779938,
2680
+ "learning_rate": 9.4248e-06,
2681
+ "loss": 0.0547,
2682
+ "step": 17150
2683
+ },
2684
+ {
2685
+ "epoch": 1.376,
2686
+ "grad_norm": 0.09787939488887787,
2687
+ "learning_rate": 9.3648e-06,
2688
+ "loss": 0.0476,
2689
+ "step": 17200
2690
+ },
2691
+ {
2692
+ "epoch": 1.38,
2693
+ "grad_norm": 0.3960016369819641,
2694
+ "learning_rate": 9.3048e-06,
2695
+ "loss": 0.0607,
2696
+ "step": 17250
2697
+ },
2698
+ {
2699
+ "epoch": 1.384,
2700
+ "grad_norm": 0.17494530975818634,
2701
+ "learning_rate": 9.2448e-06,
2702
+ "loss": 0.0579,
2703
+ "step": 17300
2704
+ },
2705
+ {
2706
+ "epoch": 1.388,
2707
+ "grad_norm": 0.17870362102985382,
2708
+ "learning_rate": 9.1848e-06,
2709
+ "loss": 0.0481,
2710
+ "step": 17350
2711
+ },
2712
+ {
2713
+ "epoch": 1.392,
2714
+ "grad_norm": 0.1152658611536026,
2715
+ "learning_rate": 9.1248e-06,
2716
+ "loss": 0.0507,
2717
+ "step": 17400
2718
+ },
2719
+ {
2720
+ "epoch": 1.396,
2721
+ "grad_norm": 0.10560191422700882,
2722
+ "learning_rate": 9.0648e-06,
2723
+ "loss": 0.0509,
2724
+ "step": 17450
2725
+ },
2726
+ {
2727
+ "epoch": 1.4,
2728
+ "grad_norm": 0.09202779084444046,
2729
+ "learning_rate": 9.004799999999999e-06,
2730
+ "loss": 0.0571,
2731
+ "step": 17500
2732
+ },
2733
+ {
2734
+ "epoch": 1.4,
2735
+ "eval_loss": 0.0821109265089035,
2736
+ "eval_runtime": 88.0678,
2737
+ "eval_samples_per_second": 22.71,
2738
+ "eval_steps_per_second": 5.677,
2739
+ "step": 17500
2740
  }
2741
  ],
2742
  "logging_steps": 50,
 
2756
  "attributes": {}
2757
  }
2758
  },
2759
+ "total_flos": 4.26270523392e+16,
2760
  "train_batch_size": 4,
2761
  "trial_name": null,
2762
  "trial_params": null