PEFT
Safetensors
Generated from Trainer
paulrichmond commited on
Commit
ce30856
·
verified ·
1 Parent(s): e56c229

Training in progress, step 34284, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e2fec58aad4dd77032730b8c8256d6df9b5b19233a11f86246c94eac4c69ea5
3
  size 18899856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f08bbff40a76729e8bb60d3a42b823dec0374093316e9d66e864d39e6e98686
3
  size 18899856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c71f39d1d94bac3c5d21186154e408fc20c0dedd6c5ed33ec28b414f4a7e1497
3
  size 37911546
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f88b3d3aafcfb52683a2c942900f2cf4331a167f2e907d54ec18889f78810ed1
3
  size 37911546
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75611bb966c18ad34fae0cdfc220a743b52f93a668dd7bdff081240147d9f5dc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b182ddba2191fc0b5d8696aacd13a263502041db09ff96fe195f390f57c46669
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdc430163360c0db4aba3b670b7ed2a96845fe7776799e70cf4e9bd497c9c307
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5f55b79d4f37b170efbcd11ec8e37cc2c97e892e710c70c5c8dbc048d1902a
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.9964998249912496,
5
  "eval_steps": 3806,
6
- "global_step": 34254,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -239965,6 +239965,216 @@
239965
  "eval_test_samples_per_second": 13.909,
239966
  "eval_test_steps_per_second": 0.869,
239967
  "step": 34254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239968
  }
239969
  ],
239970
  "logging_steps": 1,
@@ -239979,12 +240189,12 @@
239979
  "should_evaluate": false,
239980
  "should_log": false,
239981
  "should_save": true,
239982
- "should_training_stop": false
239983
  },
239984
  "attributes": {}
239985
  }
239986
  },
239987
- "total_flos": 8.935297972096205e+18,
239988
  "train_batch_size": 16,
239989
  "trial_name": null,
239990
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 3806,
6
+ "global_step": 34284,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
239965
  "eval_test_samples_per_second": 13.909,
239966
  "eval_test_steps_per_second": 0.869,
239967
  "step": 34254
239968
+ },
239969
+ {
239970
+ "epoch": 3.9966164974915412,
239971
+ "grad_norm": 0.7302963733673096,
239972
+ "learning_rate": 3.000647352429344e-06,
239973
+ "loss": 2.0368,
239974
+ "step": 34255
239975
+ },
239976
+ {
239977
+ "epoch": 3.996733169991833,
239978
+ "grad_norm": 0.7436806559562683,
239979
+ "learning_rate": 3.000603477205233e-06,
239980
+ "loss": 2.1534,
239981
+ "step": 34256
239982
+ },
239983
+ {
239984
+ "epoch": 3.9968498424921246,
239985
+ "grad_norm": 0.6236298084259033,
239986
+ "learning_rate": 3.000561141458568e-06,
239987
+ "loss": 1.9726,
239988
+ "step": 34257
239989
+ },
239990
+ {
239991
+ "epoch": 3.9969665149924163,
239992
+ "grad_norm": 0.6811991930007935,
239993
+ "learning_rate": 3.0005203451897933e-06,
239994
+ "loss": 2.0822,
239995
+ "step": 34258
239996
+ },
239997
+ {
239998
+ "epoch": 3.997083187492708,
239999
+ "grad_norm": 0.7705870866775513,
240000
+ "learning_rate": 3.000481088399339e-06,
240001
+ "loss": 2.1438,
240002
+ "step": 34259
240003
+ },
240004
+ {
240005
+ "epoch": 3.9971998599929996,
240006
+ "grad_norm": 0.6603965759277344,
240007
+ "learning_rate": 3.0004433710876e-06,
240008
+ "loss": 1.9082,
240009
+ "step": 34260
240010
+ },
240011
+ {
240012
+ "epoch": 3.9973165324932913,
240013
+ "grad_norm": 0.6230751872062683,
240014
+ "learning_rate": 3.0004071932549724e-06,
240015
+ "loss": 2.0426,
240016
+ "step": 34261
240017
+ },
240018
+ {
240019
+ "epoch": 3.997433204993583,
240020
+ "grad_norm": 0.7174234390258789,
240021
+ "learning_rate": 3.000372554901835e-06,
240022
+ "loss": 1.992,
240023
+ "step": 34262
240024
+ },
240025
+ {
240026
+ "epoch": 3.9975498774938747,
240027
+ "grad_norm": 0.6722437739372253,
240028
+ "learning_rate": 3.0003394560285347e-06,
240029
+ "loss": 2.0709,
240030
+ "step": 34263
240031
+ },
240032
+ {
240033
+ "epoch": 3.9976665499941664,
240034
+ "grad_norm": 0.641873300075531,
240035
+ "learning_rate": 3.0003078966354333e-06,
240036
+ "loss": 1.9371,
240037
+ "step": 34264
240038
+ },
240039
+ {
240040
+ "epoch": 3.997783222494458,
240041
+ "grad_norm": 0.6292130351066589,
240042
+ "learning_rate": 3.000277876722828e-06,
240043
+ "loss": 1.9375,
240044
+ "step": 34265
240045
+ },
240046
+ {
240047
+ "epoch": 3.9978998949947497,
240048
+ "grad_norm": 0.6383855938911438,
240049
+ "learning_rate": 3.000249396291065e-06,
240050
+ "loss": 2.1935,
240051
+ "step": 34266
240052
+ },
240053
+ {
240054
+ "epoch": 3.9980165674950414,
240055
+ "grad_norm": 0.7069698572158813,
240056
+ "learning_rate": 3.0002224553404246e-06,
240057
+ "loss": 2.0077,
240058
+ "step": 34267
240059
+ },
240060
+ {
240061
+ "epoch": 3.998133239995333,
240062
+ "grad_norm": 0.6327721476554871,
240063
+ "learning_rate": 3.0001970538711872e-06,
240064
+ "loss": 2.0699,
240065
+ "step": 34268
240066
+ },
240067
+ {
240068
+ "epoch": 3.998249912495625,
240069
+ "grad_norm": 0.6597331166267395,
240070
+ "learning_rate": 3.0001731918836162e-06,
240071
+ "loss": 2.0667,
240072
+ "step": 34269
240073
+ },
240074
+ {
240075
+ "epoch": 3.9983665849959165,
240076
+ "grad_norm": 0.661301851272583,
240077
+ "learning_rate": 3.000150869377943e-06,
240078
+ "loss": 1.9734,
240079
+ "step": 34270
240080
+ },
240081
+ {
240082
+ "epoch": 3.998483257496208,
240083
+ "grad_norm": 0.6402481198310852,
240084
+ "learning_rate": 3.000130086354431e-06,
240085
+ "loss": 1.8784,
240086
+ "step": 34271
240087
+ },
240088
+ {
240089
+ "epoch": 3.9985999299965,
240090
+ "grad_norm": 0.7119265198707581,
240091
+ "learning_rate": 3.000110842813261e-06,
240092
+ "loss": 2.1001,
240093
+ "step": 34272
240094
+ },
240095
+ {
240096
+ "epoch": 3.9987166024967915,
240097
+ "grad_norm": 0.636619508266449,
240098
+ "learning_rate": 3.0000931387546646e-06,
240099
+ "loss": 2.2329,
240100
+ "step": 34273
240101
+ },
240102
+ {
240103
+ "epoch": 3.998833274997083,
240104
+ "grad_norm": 0.6781154274940491,
240105
+ "learning_rate": 3.0000769741788074e-06,
240106
+ "loss": 2.0247,
240107
+ "step": 34274
240108
+ },
240109
+ {
240110
+ "epoch": 3.998949947497375,
240111
+ "grad_norm": 0.7756646871566772,
240112
+ "learning_rate": 3.0000623490858523e-06,
240113
+ "loss": 2.0191,
240114
+ "step": 34275
240115
+ },
240116
+ {
240117
+ "epoch": 3.9990666199976665,
240118
+ "grad_norm": 0.6341996192932129,
240119
+ "learning_rate": 3.000049263475966e-06,
240120
+ "loss": 1.9516,
240121
+ "step": 34276
240122
+ },
240123
+ {
240124
+ "epoch": 3.9991832924979582,
240125
+ "grad_norm": 0.6424492001533508,
240126
+ "learning_rate": 3.000037717349279e-06,
240127
+ "loss": 1.9659,
240128
+ "step": 34277
240129
+ },
240130
+ {
240131
+ "epoch": 3.99929996499825,
240132
+ "grad_norm": 0.5968495607376099,
240133
+ "learning_rate": 3.000027710705908e-06,
240134
+ "loss": 1.985,
240135
+ "step": 34278
240136
+ },
240137
+ {
240138
+ "epoch": 3.9994166374985416,
240139
+ "grad_norm": 0.6369019150733948,
240140
+ "learning_rate": 3.00001924354595e-06,
240141
+ "loss": 1.9489,
240142
+ "step": 34279
240143
+ },
240144
+ {
240145
+ "epoch": 3.9995333099988333,
240146
+ "grad_norm": 0.6295581459999084,
240147
+ "learning_rate": 3.0000123158695064e-06,
240148
+ "loss": 1.9638,
240149
+ "step": 34280
240150
+ },
240151
+ {
240152
+ "epoch": 3.999649982499125,
240153
+ "grad_norm": 0.5876207947731018,
240154
+ "learning_rate": 3.0000069276766416e-06,
240155
+ "loss": 1.9769,
240156
+ "step": 34281
240157
+ },
240158
+ {
240159
+ "epoch": 3.9997666549994166,
240160
+ "grad_norm": 0.6616494059562683,
240161
+ "learning_rate": 3.0000030789674052e-06,
240162
+ "loss": 2.0094,
240163
+ "step": 34282
240164
+ },
240165
+ {
240166
+ "epoch": 3.9998833274997083,
240167
+ "grad_norm": 0.6334449052810669,
240168
+ "learning_rate": 3.0000007697418473e-06,
240169
+ "loss": 1.9549,
240170
+ "step": 34283
240171
+ },
240172
+ {
240173
+ "epoch": 4.0,
240174
+ "grad_norm": 0.618880569934845,
240175
+ "learning_rate": 2.9999999999999997e-06,
240176
+ "loss": 1.8764,
240177
+ "step": 34284
240178
  }
240179
  ],
240180
  "logging_steps": 1,
 
240189
  "should_evaluate": false,
240190
  "should_log": false,
240191
  "should_save": true,
240192
+ "should_training_stop": true
240193
  },
240194
  "attributes": {}
240195
  }
240196
  },
240197
+ "total_flos": 8.943124342074507e+18,
240198
  "train_batch_size": 16,
240199
  "trial_name": null,
240200
  "trial_params": null