PEFT
Safetensors
Generated from Trainer
paulrichmond commited on
Commit
2c83da9
·
verified ·
1 Parent(s): 72e9d62

Training in progress, step 52728, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0de6285001bd8047f2168df2de913ecc009dcd5eaeb79d5dc0446d44fa60aff2
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6684257e283f99e4654fa6c675e02d7147bd1823d28457e21845ce8ee1211c
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0092129d1c1becb33ccbfa9d8abf6fa6c26c43df945b0d8efc4d43888081e423
3
  size 168150738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5105fef59c6f148a4647a6d396eee790d7ef1df177c316936d04fa3cb36d6d8
3
  size 168150738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:838d077578cf57146d97930a4c665e7072d38b82b8ba980e0abc55d61872f263
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0349d2fe2a8c07a922fcdd7b869bf30ef13bdfa2a6c39cdb10a1e22ecb0cae68
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5d026ee22720d4c5dfda2ca21e8307fef2c9ea235f7efec09ae3c42586de386
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195e1c218824e864593415afe68e9e8127e01aea101782826593ecf3daaaf683
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.996131087847064,
5
  "eval_steps": 5853,
6
- "global_step": 52677,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -368926,6 +368926,363 @@
368926
  "eval_test_samples_per_second": 12.628,
368927
  "eval_test_steps_per_second": 0.789,
368928
  "step": 52677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368929
  }
368930
  ],
368931
  "logging_steps": 1,
@@ -368940,12 +369297,12 @@
368940
  "should_evaluate": false,
368941
  "should_log": false,
368942
  "should_save": true,
368943
- "should_training_stop": false
368944
  },
368945
  "attributes": {}
368946
  }
368947
  },
368948
- "total_flos": 1.3312906253126074e+19,
368949
  "train_batch_size": 16,
368950
  "trial_name": null,
368951
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 5853,
6
+ "global_step": 52728,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
368926
  "eval_test_samples_per_second": 12.628,
368927
  "eval_test_steps_per_second": 0.789,
368928
  "step": 52677
368929
+ },
368930
+ {
368931
+ "epoch": 3.9962069488696708,
368932
+ "grad_norm": 1.1055113077163696,
368933
+ "learning_rate": 3.0008135267917343e-06,
368934
+ "loss": 2.2484,
368935
+ "step": 52678
368936
+ },
368937
+ {
368938
+ "epoch": 3.9962828098922776,
368939
+ "grad_norm": 1.020114541053772,
368940
+ "learning_rate": 3.000781311159022e-06,
368941
+ "loss": 2.2629,
368942
+ "step": 52679
368943
+ },
368944
+ {
368945
+ "epoch": 3.996358670914884,
368946
+ "grad_norm": 0.8700923323631287,
368947
+ "learning_rate": 3.0007497463449255e-06,
368948
+ "loss": 2.1114,
368949
+ "step": 52680
368950
+ },
368951
+ {
368952
+ "epoch": 3.9964345319374903,
368953
+ "grad_norm": 1.25503671169281,
368954
+ "learning_rate": 3.0007188323495756e-06,
368955
+ "loss": 2.1506,
368956
+ "step": 52681
368957
+ },
368958
+ {
368959
+ "epoch": 3.996510392960097,
368960
+ "grad_norm": 1.2695883512496948,
368961
+ "learning_rate": 3.0006885691730872e-06,
368962
+ "loss": 2.226,
368963
+ "step": 52682
368964
+ },
368965
+ {
368966
+ "epoch": 3.996586253982704,
368967
+ "grad_norm": 1.0582082271575928,
368968
+ "learning_rate": 3.0006589568156268e-06,
368969
+ "loss": 2.2531,
368970
+ "step": 52683
368971
+ },
368972
+ {
368973
+ "epoch": 3.9966621150053103,
368974
+ "grad_norm": 0.9807246327400208,
368975
+ "learning_rate": 3.000629995277292e-06,
368976
+ "loss": 2.1907,
368977
+ "step": 52684
368978
+ },
368979
+ {
368980
+ "epoch": 3.9967379760279167,
368981
+ "grad_norm": 0.9967033863067627,
368982
+ "learning_rate": 3.0006016845582158e-06,
368983
+ "loss": 2.0615,
368984
+ "step": 52685
368985
+ },
368986
+ {
368987
+ "epoch": 3.9968138370505235,
368988
+ "grad_norm": 1.1809074878692627,
368989
+ "learning_rate": 3.0005740246585456e-06,
368990
+ "loss": 2.1231,
368991
+ "step": 52686
368992
+ },
368993
+ {
368994
+ "epoch": 3.99688969807313,
368995
+ "grad_norm": 0.9519857168197632,
368996
+ "learning_rate": 3.0005470155783807e-06,
368997
+ "loss": 2.2006,
368998
+ "step": 52687
368999
+ },
369000
+ {
369001
+ "epoch": 3.9969655590957367,
369002
+ "grad_norm": 1.0005104541778564,
369003
+ "learning_rate": 3.000520657317853e-06,
369004
+ "loss": 2.2421,
369005
+ "step": 52688
369006
+ },
369007
+ {
369008
+ "epoch": 3.997041420118343,
369009
+ "grad_norm": 0.9390363097190857,
369010
+ "learning_rate": 3.0004949498770612e-06,
369011
+ "loss": 2.2009,
369012
+ "step": 52689
369013
+ },
369014
+ {
369015
+ "epoch": 3.99711728114095,
369016
+ "grad_norm": 1.043871521949768,
369017
+ "learning_rate": 3.0004698932561214e-06,
369018
+ "loss": 2.0931,
369019
+ "step": 52690
369020
+ },
369021
+ {
369022
+ "epoch": 3.9971931421635563,
369023
+ "grad_norm": 0.9061468839645386,
369024
+ "learning_rate": 3.0004454874551646e-06,
369025
+ "loss": 2.0976,
369026
+ "step": 52691
369027
+ },
369028
+ {
369029
+ "epoch": 3.997269003186163,
369030
+ "grad_norm": 1.4057188034057617,
369031
+ "learning_rate": 3.000421732474274e-06,
369032
+ "loss": 2.3907,
369033
+ "step": 52692
369034
+ },
369035
+ {
369036
+ "epoch": 3.9973448642087694,
369037
+ "grad_norm": 1.1326065063476562,
369038
+ "learning_rate": 3.0003986283135803e-06,
369039
+ "loss": 2.2332,
369040
+ "step": 52693
369041
+ },
369042
+ {
369043
+ "epoch": 3.9974207252313763,
369044
+ "grad_norm": 1.048019528388977,
369045
+ "learning_rate": 3.0003761749731514e-06,
369046
+ "loss": 2.0218,
369047
+ "step": 52694
369048
+ },
369049
+ {
369050
+ "epoch": 3.9974965862539826,
369051
+ "grad_norm": 0.9427198767662048,
369052
+ "learning_rate": 3.0003543724531006e-06,
369053
+ "loss": 2.0542,
369054
+ "step": 52695
369055
+ },
369056
+ {
369057
+ "epoch": 3.9975724472765894,
369058
+ "grad_norm": 1.1140172481536865,
369059
+ "learning_rate": 3.0003332207535282e-06,
369060
+ "loss": 2.2512,
369061
+ "step": 52696
369062
+ },
369063
+ {
369064
+ "epoch": 3.997648308299196,
369065
+ "grad_norm": 1.0869436264038086,
369066
+ "learning_rate": 3.000312719874516e-06,
369067
+ "loss": 2.262,
369068
+ "step": 52697
369069
+ },
369070
+ {
369071
+ "epoch": 3.9977241693218026,
369072
+ "grad_norm": 0.9387947916984558,
369073
+ "learning_rate": 3.000292869816164e-06,
369074
+ "loss": 2.1539,
369075
+ "step": 52698
369076
+ },
369077
+ {
369078
+ "epoch": 3.997800030344409,
369079
+ "grad_norm": 0.8832263946533203,
369080
+ "learning_rate": 3.0002736705785535e-06,
369081
+ "loss": 2.4051,
369082
+ "step": 52699
369083
+ },
369084
+ {
369085
+ "epoch": 3.9978758913670154,
369086
+ "grad_norm": 1.0139085054397583,
369087
+ "learning_rate": 3.0002551221617832e-06,
369088
+ "loss": 1.9968,
369089
+ "step": 52700
369090
+ },
369091
+ {
369092
+ "epoch": 3.997951752389622,
369093
+ "grad_norm": 1.092558741569519,
369094
+ "learning_rate": 3.000237224565903e-06,
369095
+ "loss": 2.2382,
369096
+ "step": 52701
369097
+ },
369098
+ {
369099
+ "epoch": 3.998027613412229,
369100
+ "grad_norm": 1.0027174949645996,
369101
+ "learning_rate": 3.000219977791029e-06,
369102
+ "loss": 2.0828,
369103
+ "step": 52702
369104
+ },
369105
+ {
369106
+ "epoch": 3.9981034744348354,
369107
+ "grad_norm": 0.9388405084609985,
369108
+ "learning_rate": 3.00020338183721e-06,
369109
+ "loss": 2.0502,
369110
+ "step": 52703
369111
+ },
369112
+ {
369113
+ "epoch": 3.9981793354574418,
369114
+ "grad_norm": 0.9701755046844482,
369115
+ "learning_rate": 3.000187436704528e-06,
369116
+ "loss": 2.109,
369117
+ "step": 52704
369118
+ },
369119
+ {
369120
+ "epoch": 3.9982551964800486,
369121
+ "grad_norm": 0.9503781795501709,
369122
+ "learning_rate": 3.0001721423930496e-06,
369123
+ "loss": 2.169,
369124
+ "step": 52705
369125
+ },
369126
+ {
369127
+ "epoch": 3.9983310575026554,
369128
+ "grad_norm": 1.0426063537597656,
369129
+ "learning_rate": 3.000157498902841e-06,
369130
+ "loss": 2.1868,
369131
+ "step": 52706
369132
+ },
369133
+ {
369134
+ "epoch": 3.9984069185252618,
369135
+ "grad_norm": 0.9852115511894226,
369136
+ "learning_rate": 3.000143506233984e-06,
369137
+ "loss": 2.25,
369138
+ "step": 52707
369139
+ },
369140
+ {
369141
+ "epoch": 3.998482779547868,
369142
+ "grad_norm": 0.8936397433280945,
369143
+ "learning_rate": 3.0001301643865117e-06,
369144
+ "loss": 2.0757,
369145
+ "step": 52708
369146
+ },
369147
+ {
369148
+ "epoch": 3.998558640570475,
369149
+ "grad_norm": 0.9924890398979187,
369150
+ "learning_rate": 3.00011747336049e-06,
369151
+ "loss": 2.2185,
369152
+ "step": 52709
369153
+ },
369154
+ {
369155
+ "epoch": 3.9986345015930813,
369156
+ "grad_norm": 1.0129131078720093,
369157
+ "learning_rate": 3.0001054331560014e-06,
369158
+ "loss": 2.1824,
369159
+ "step": 52710
369160
+ },
369161
+ {
369162
+ "epoch": 3.998710362615688,
369163
+ "grad_norm": 0.9240451455116272,
369164
+ "learning_rate": 3.0000940437730624e-06,
369165
+ "loss": 2.0595,
369166
+ "step": 52711
369167
+ },
369168
+ {
369169
+ "epoch": 3.9987862236382945,
369170
+ "grad_norm": 1.037165641784668,
369171
+ "learning_rate": 3.0000833052117394e-06,
369172
+ "loss": 2.1509,
369173
+ "step": 52712
369174
+ },
369175
+ {
369176
+ "epoch": 3.9988620846609013,
369177
+ "grad_norm": 0.9835069179534912,
369178
+ "learning_rate": 3.000073217472098e-06,
369179
+ "loss": 2.0827,
369180
+ "step": 52713
369181
+ },
369182
+ {
369183
+ "epoch": 3.9989379456835077,
369184
+ "grad_norm": 1.0959041118621826,
369185
+ "learning_rate": 3.000063780554138e-06,
369186
+ "loss": 2.1468,
369187
+ "step": 52714
369188
+ },
369189
+ {
369190
+ "epoch": 3.9990138067061145,
369191
+ "grad_norm": 1.2777659893035889,
369192
+ "learning_rate": 3.000054994457942e-06,
369193
+ "loss": 2.3319,
369194
+ "step": 52715
369195
+ },
369196
+ {
369197
+ "epoch": 3.999089667728721,
369198
+ "grad_norm": 0.9920614957809448,
369199
+ "learning_rate": 3.0000468591835265e-06,
369200
+ "loss": 2.1972,
369201
+ "step": 52716
369202
+ },
369203
+ {
369204
+ "epoch": 3.9991655287513277,
369205
+ "grad_norm": 1.0651792287826538,
369206
+ "learning_rate": 3.000039374730924e-06,
369207
+ "loss": 2.0964,
369208
+ "step": 52717
369209
+ },
369210
+ {
369211
+ "epoch": 3.999241389773934,
369212
+ "grad_norm": 1.1940739154815674,
369213
+ "learning_rate": 3.000032541100185e-06,
369214
+ "loss": 1.9985,
369215
+ "step": 52718
369216
+ },
369217
+ {
369218
+ "epoch": 3.999317250796541,
369219
+ "grad_norm": 0.9705497026443481,
369220
+ "learning_rate": 3.0000263582913414e-06,
369221
+ "loss": 2.008,
369222
+ "step": 52719
369223
+ },
369224
+ {
369225
+ "epoch": 3.9993931118191473,
369226
+ "grad_norm": 1.0034539699554443,
369227
+ "learning_rate": 3.000020826304394e-06,
369228
+ "loss": 2.0824,
369229
+ "step": 52720
369230
+ },
369231
+ {
369232
+ "epoch": 3.999468972841754,
369233
+ "grad_norm": 1.047110915184021,
369234
+ "learning_rate": 3.0000159451393913e-06,
369235
+ "loss": 2.0688,
369236
+ "step": 52721
369237
+ },
369238
+ {
369239
+ "epoch": 3.9995448338643604,
369240
+ "grad_norm": 0.9343622922897339,
369241
+ "learning_rate": 3.000011714796335e-06,
369242
+ "loss": 2.2584,
369243
+ "step": 52722
369244
+ },
369245
+ {
369246
+ "epoch": 3.999620694886967,
369247
+ "grad_norm": 1.0422899723052979,
369248
+ "learning_rate": 3.0000081352752726e-06,
369249
+ "loss": 2.1829,
369250
+ "step": 52723
369251
+ },
369252
+ {
369253
+ "epoch": 3.9996965559095736,
369254
+ "grad_norm": 1.077273964881897,
369255
+ "learning_rate": 3.0000052065761888e-06,
369256
+ "loss": 2.0929,
369257
+ "step": 52724
369258
+ },
369259
+ {
369260
+ "epoch": 3.9997724169321804,
369261
+ "grad_norm": 0.9545094966888428,
369262
+ "learning_rate": 3.0000029286991165e-06,
369263
+ "loss": 2.1265,
369264
+ "step": 52725
369265
+ },
369266
+ {
369267
+ "epoch": 3.999848277954787,
369268
+ "grad_norm": 0.9401389956474304,
369269
+ "learning_rate": 3.0000013016440555e-06,
369270
+ "loss": 1.9835,
369271
+ "step": 52726
369272
+ },
369273
+ {
369274
+ "epoch": 3.999924138977393,
369275
+ "grad_norm": 0.9990488290786743,
369276
+ "learning_rate": 3.0000003254110053e-06,
369277
+ "loss": 2.2038,
369278
+ "step": 52727
369279
+ },
369280
+ {
369281
+ "epoch": 4.0,
369282
+ "grad_norm": 1.4670413732528687,
369283
+ "learning_rate": 2.9999999999999997e-06,
369284
+ "loss": 1.9777,
369285
+ "step": 52728
369286
  }
369287
  ],
369288
  "logging_steps": 1,
 
369297
  "should_evaluate": false,
369298
  "should_log": false,
369299
  "should_save": true,
369300
+ "should_training_stop": true
369301
  },
369302
  "attributes": {}
369303
  }
369304
  },
369305
+ "total_flos": 1.3325515079398982e+19,
369306
  "train_batch_size": 16,
369307
  "trial_name": null,
369308
  "trial_params": null