PEFT
Safetensors
Generated from Trainer
paulrichmond commited on
Commit
cb61019
·
verified ·
1 Parent(s): 2f36fe3

Training in progress, step 29560, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:198cf9e2f7b9b672052b35384c96e423fbbed0604b744e435fcce6f796ded998
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d8b9c5f0b247c388d0afbe8907171a0d81b141a933b1e9361f9bbf2d35e204
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:595036a57cc0d3ee525f8dc47ce9b2dac7c500c9adb6353185e143c6de89824a
3
  size 168150738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f82ac87fc2dd1538493c584b6bfaa34aacaf4f8dd89f4d5c843f7d1407dde93d
3
  size 168150738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e3c64eefa3f7dd16ade383bee02b051776ec0d884de60d623c9f3616c93f904
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef6ee5c960556f49ab0201cef6ec598647c83cf5eebbbd5fbf9582e268f90127
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72573ab543ab6543a4c72300875bab9524df97382858e2308190d2fdd5358e35
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c7fd32361ba3725f77c3361e30c891765755fcbe92da53fa4ea9fbbad5a7c8
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.99702300405954,
5
  "eval_steps": 3282,
6
- "global_step": 29538,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -206953,6 +206953,160 @@
206953
  "eval_test_samples_per_second": 12.934,
206954
  "eval_test_steps_per_second": 0.809,
206955
  "step": 29538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206956
  }
206957
  ],
206958
  "logging_steps": 1,
@@ -206967,12 +207121,12 @@
206967
  "should_evaluate": false,
206968
  "should_log": false,
206969
  "should_save": true,
206970
- "should_training_stop": false
206971
  },
206972
  "attributes": {}
206973
  }
206974
  },
206975
- "total_flos": 7.209966655963202e+18,
206976
  "train_batch_size": 16,
206977
  "trial_name": null,
206978
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 3282,
6
+ "global_step": 29560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
206953
  "eval_test_samples_per_second": 12.934,
206954
  "eval_test_steps_per_second": 0.809,
206955
  "step": 29538
206956
+ },
206957
+ {
206958
+ "epoch": 3.9971583220568334,
206959
+ "grad_norm": 1.4479291439056396,
206960
+ "learning_rate": 3.0004566050305867e-06,
206961
+ "loss": 1.7956,
206962
+ "step": 29539
206963
+ },
206964
+ {
206965
+ "epoch": 3.997293640054127,
206966
+ "grad_norm": 1.3905441761016846,
206967
+ "learning_rate": 3.000414154242476e-06,
206968
+ "loss": 1.8403,
206969
+ "step": 29540
206970
+ },
206971
+ {
206972
+ "epoch": 3.9974289580514206,
206973
+ "grad_norm": 1.563751220703125,
206974
+ "learning_rate": 3.0003737742207762e-06,
206975
+ "loss": 1.9486,
206976
+ "step": 29541
206977
+ },
206978
+ {
206979
+ "epoch": 3.9975642760487142,
206980
+ "grad_norm": 1.3970868587493896,
206981
+ "learning_rate": 3.000335464966031e-06,
206982
+ "loss": 1.9456,
206983
+ "step": 29542
206984
+ },
206985
+ {
206986
+ "epoch": 3.9976995940460083,
206987
+ "grad_norm": 1.3398752212524414,
206988
+ "learning_rate": 3.000299226478785e-06,
206989
+ "loss": 1.8753,
206990
+ "step": 29543
206991
+ },
206992
+ {
206993
+ "epoch": 3.997834912043302,
206994
+ "grad_norm": 1.3150476217269897,
206995
+ "learning_rate": 3.0002650587595492e-06,
206996
+ "loss": 1.9388,
206997
+ "step": 29544
206998
+ },
206999
+ {
207000
+ "epoch": 3.9979702300405955,
207001
+ "grad_norm": 1.5918689966201782,
207002
+ "learning_rate": 3.0002329618087684e-06,
207003
+ "loss": 1.917,
207004
+ "step": 29545
207005
+ },
207006
+ {
207007
+ "epoch": 3.998105548037889,
207008
+ "grad_norm": 1.2473838329315186,
207009
+ "learning_rate": 3.0002029356269215e-06,
207010
+ "loss": 1.9181,
207011
+ "step": 29546
207012
+ },
207013
+ {
207014
+ "epoch": 3.9982408660351827,
207015
+ "grad_norm": 1.546493649482727,
207016
+ "learning_rate": 3.000174980214419e-06,
207017
+ "loss": 2.154,
207018
+ "step": 29547
207019
+ },
207020
+ {
207021
+ "epoch": 3.9983761840324763,
207022
+ "grad_norm": 1.1951613426208496,
207023
+ "learning_rate": 3.0001490955716424e-06,
207024
+ "loss": 1.7965,
207025
+ "step": 29548
207026
+ },
207027
+ {
207028
+ "epoch": 3.99851150202977,
207029
+ "grad_norm": 1.4026210308074951,
207030
+ "learning_rate": 3.000125281698969e-06,
207031
+ "loss": 1.9001,
207032
+ "step": 29549
207033
+ },
207034
+ {
207035
+ "epoch": 3.9986468200270635,
207036
+ "grad_norm": 1.2616246938705444,
207037
+ "learning_rate": 3.000103538596713e-06,
207038
+ "loss": 1.9108,
207039
+ "step": 29550
207040
+ },
207041
+ {
207042
+ "epoch": 3.998782138024357,
207043
+ "grad_norm": 1.3047972917556763,
207044
+ "learning_rate": 3.000083866265187e-06,
207045
+ "loss": 1.9664,
207046
+ "step": 29551
207047
+ },
207048
+ {
207049
+ "epoch": 3.9989174560216507,
207050
+ "grad_norm": 1.5460106134414673,
207051
+ "learning_rate": 3.0000662647046716e-06,
207052
+ "loss": 2.0249,
207053
+ "step": 29552
207054
+ },
207055
+ {
207056
+ "epoch": 3.9990527740189448,
207057
+ "grad_norm": 1.267030119895935,
207058
+ "learning_rate": 3.0000507339153984e-06,
207059
+ "loss": 1.9507,
207060
+ "step": 29553
207061
+ },
207062
+ {
207063
+ "epoch": 3.9991880920162384,
207064
+ "grad_norm": 1.4216227531433105,
207065
+ "learning_rate": 3.000037273897597e-06,
207066
+ "loss": 1.9319,
207067
+ "step": 29554
207068
+ },
207069
+ {
207070
+ "epoch": 3.999323410013532,
207071
+ "grad_norm": 1.4820796251296997,
207072
+ "learning_rate": 3.0000258846514327e-06,
207073
+ "loss": 2.0095,
207074
+ "step": 29555
207075
+ },
207076
+ {
207077
+ "epoch": 3.9994587280108256,
207078
+ "grad_norm": 1.5175132751464844,
207079
+ "learning_rate": 3.000016566177087e-06,
207080
+ "loss": 1.853,
207081
+ "step": 29556
207082
+ },
207083
+ {
207084
+ "epoch": 3.999594046008119,
207085
+ "grad_norm": 1.2719131708145142,
207086
+ "learning_rate": 3.000009318474692e-06,
207087
+ "loss": 1.7825,
207088
+ "step": 29557
207089
+ },
207090
+ {
207091
+ "epoch": 3.999729364005413,
207092
+ "grad_norm": 1.266430377960205,
207093
+ "learning_rate": 3.000004141544329e-06,
207094
+ "loss": 1.7838,
207095
+ "step": 29558
207096
+ },
207097
+ {
207098
+ "epoch": 3.9998646820027064,
207099
+ "grad_norm": 1.3060740232467651,
207100
+ "learning_rate": 3.0000010353860824e-06,
207101
+ "loss": 1.8332,
207102
+ "step": 29559
207103
+ },
207104
+ {
207105
+ "epoch": 4.0,
207106
+ "grad_norm": 3.18343448638916,
207107
+ "learning_rate": 2.9999999999999997e-06,
207108
+ "loss": 1.9557,
207109
+ "step": 29560
207110
  }
207111
  ],
207112
  "logging_steps": 1,
 
207121
  "should_evaluate": false,
207122
  "should_log": false,
207123
  "should_save": true,
207124
+ "should_training_stop": true
207125
  },
207126
  "attributes": {}
207127
  }
207128
  },
207129
+ "total_flos": 7.215024419447243e+18,
207130
  "train_batch_size": 16,
207131
  "trial_name": null,
207132
  "trial_params": null