vng08 commited on
Commit
52ddebe
·
verified ·
1 Parent(s): 371127e

Training in progress, step 714, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ab2986e69624e622096aab328ef20778587626cab78a308484e77c04209819
3
  size 310662536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294e8fe234cd6d1f59b14c1140e50fdbdb90a57220e6173c48a127e84e49c4e8
3
  size 310662536
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5115b34b8603ed8f6633b921855aa63724e8ce7221c16e97f80b58a14075229
3
  size 162452055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fca9108c676adcfdd8ed80962605abd4605ab61e005b4a771e012d23b1c627
3
  size 162452055
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:850c3d909f8a0af6f9b431fac5a25833ab1658c39f899825e3b347b6af8a490b
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb52ddc7b4d4702afb8bac65566641a27c974b1518c3f2f4987cdc6cc976a909
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af8f5e8caef6c235c2a73a42bf19f55da6d027796053c084f7bda2a06e133e53
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e5d5ca4d0f7bab3012c2ed7b0a337ccf31a6ebdeb831e6c0b5b1e71cabedc08
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9810791871058164,
6
  "eval_steps": 500,
7
- "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4908,6 +4908,104 @@
4908
  "learning_rate": 4.2313117066290545e-06,
4909
  "loss": 0.5183,
4910
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4911
  }
4912
  ],
4913
  "logging_steps": 1,
@@ -4922,12 +5020,12 @@
4922
  "should_evaluate": false,
4923
  "should_log": false,
4924
  "should_save": true,
4925
- "should_training_stop": false
4926
  },
4927
  "attributes": {}
4928
  }
4929
  },
4930
- "total_flos": 8.040071266760909e+16,
4931
  "train_batch_size": 2,
4932
  "trial_name": null,
4933
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 714,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4908
  "learning_rate": 4.2313117066290545e-06,
4909
  "loss": 0.5183,
4910
  "step": 700
4911
+ },
4912
+ {
4913
+ "epoch": 0.9824807288016818,
4914
+ "grad_norm": 1.2964062690734863,
4915
+ "learning_rate": 3.949224259520452e-06,
4916
+ "loss": 0.45,
4917
+ "step": 701
4918
+ },
4919
+ {
4920
+ "epoch": 0.9838822704975473,
4921
+ "grad_norm": 1.2322115898132324,
4922
+ "learning_rate": 3.667136812411848e-06,
4923
+ "loss": 0.3094,
4924
+ "step": 702
4925
+ },
4926
+ {
4927
+ "epoch": 0.9852838121934128,
4928
+ "grad_norm": 1.0808106660842896,
4929
+ "learning_rate": 3.3850493653032446e-06,
4930
+ "loss": 0.2605,
4931
+ "step": 703
4932
+ },
4933
+ {
4934
+ "epoch": 0.9866853538892782,
4935
+ "grad_norm": 1.4192579984664917,
4936
+ "learning_rate": 3.1029619181946405e-06,
4937
+ "loss": 0.3415,
4938
+ "step": 704
4939
+ },
4940
+ {
4941
+ "epoch": 0.9880868955851436,
4942
+ "grad_norm": 1.0820287466049194,
4943
+ "learning_rate": 2.8208744710860367e-06,
4944
+ "loss": 0.2336,
4945
+ "step": 705
4946
+ },
4947
+ {
4948
+ "epoch": 0.9894884372810091,
4949
+ "grad_norm": 1.4534846544265747,
4950
+ "learning_rate": 2.538787023977433e-06,
4951
+ "loss": 0.4909,
4952
+ "step": 706
4953
+ },
4954
+ {
4955
+ "epoch": 0.9908899789768746,
4956
+ "grad_norm": 1.3459112644195557,
4957
+ "learning_rate": 2.2566995768688293e-06,
4958
+ "loss": 0.3371,
4959
+ "step": 707
4960
+ },
4961
+ {
4962
+ "epoch": 0.9922915206727401,
4963
+ "grad_norm": 1.2415499687194824,
4964
+ "learning_rate": 1.974612129760226e-06,
4965
+ "loss": 0.2859,
4966
+ "step": 708
4967
+ },
4968
+ {
4969
+ "epoch": 0.9936930623686054,
4970
+ "grad_norm": 1.022193431854248,
4971
+ "learning_rate": 1.6925246826516223e-06,
4972
+ "loss": 0.3233,
4973
+ "step": 709
4974
+ },
4975
+ {
4976
+ "epoch": 0.9950946040644709,
4977
+ "grad_norm": 1.0108222961425781,
4978
+ "learning_rate": 1.4104372355430184e-06,
4979
+ "loss": 0.2245,
4980
+ "step": 710
4981
+ },
4982
+ {
4983
+ "epoch": 0.9964961457603364,
4984
+ "grad_norm": 1.4070162773132324,
4985
+ "learning_rate": 1.1283497884344147e-06,
4986
+ "loss": 0.4077,
4987
+ "step": 711
4988
+ },
4989
+ {
4990
+ "epoch": 0.9978976874562018,
4991
+ "grad_norm": 1.2831224203109741,
4992
+ "learning_rate": 8.462623413258111e-07,
4993
+ "loss": 0.4383,
4994
+ "step": 712
4995
+ },
4996
+ {
4997
+ "epoch": 0.9992992291520673,
4998
+ "grad_norm": 1.686324119567871,
4999
+ "learning_rate": 5.641748942172073e-07,
5000
+ "loss": 0.5735,
5001
+ "step": 713
5002
+ },
5003
+ {
5004
+ "epoch": 1.0,
5005
+ "grad_norm": 1.848501205444336,
5006
+ "learning_rate": 2.8208744710860366e-07,
5007
+ "loss": 0.429,
5008
+ "step": 714
5009
  }
5010
  ],
5011
  "logging_steps": 1,
 
5020
  "should_evaluate": false,
5021
  "should_log": false,
5022
  "should_save": true,
5023
+ "should_training_stop": true
5024
  },
5025
  "attributes": {}
5026
  }
5027
  },
5028
+ "total_flos": 8.19498707616338e+16,
5029
  "train_batch_size": 2,
5030
  "trial_name": null,
5031
  "trial_params": null