ejunlee commited on
Commit
baca8a9
·
1 Parent(s): 024e4fa

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +0 -13
  2. optimizer.pt +2 -2
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +177 -27
README.md CHANGED
@@ -4,18 +4,6 @@ library_name: peft
4
  ## Training procedure
5
 
6
 
7
- The following `bitsandbytes` quantization config was used during training:
8
- - quant_method: bitsandbytes
9
- - load_in_8bit: False
10
- - load_in_4bit: True
11
- - llm_int8_threshold: 6.0
12
- - llm_int8_skip_modules: None
13
- - llm_int8_enable_fp32_cpu_offload: False
14
- - llm_int8_has_fp16_weight: False
15
- - bnb_4bit_quant_type: fp4
16
- - bnb_4bit_use_double_quant: False
17
- - bnb_4bit_compute_dtype: float16
18
-
19
  The following `bitsandbytes` quantization config was used during training:
20
  - quant_method: bitsandbytes
21
  - load_in_8bit: False
@@ -29,6 +17,5 @@ The following `bitsandbytes` quantization config was used during training:
29
  - bnb_4bit_compute_dtype: float16
30
  ### Framework versions
31
 
32
- - PEFT 0.4.0
33
 
34
  - PEFT 0.4.0
 
4
  ## Training procedure
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  The following `bitsandbytes` quantization config was used during training:
8
  - quant_method: bitsandbytes
9
  - load_in_8bit: False
 
17
  - bnb_4bit_compute_dtype: float16
18
  ### Framework versions
19
 
 
20
 
21
  - PEFT 0.4.0
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:441ed8437ef696fca88f87327b562c9340116b1441f6c8d2c8d50fc293f0f392
3
- size 63564410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:868f5d86fc275789c060248405ac631b57ec129ab94e0620f2afcb7c25733ddd
3
+ size 16308576
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:875acd07fbaa252f994f5aa2f25ef1f4bdd0643009b8c06c5b0ae9b034919328
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d2339224b4468b2fd7540559183bd139477eaccc037e756e8392f83c137ce5
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2b9a4abb47b1f7cc1dfd2e4035b39611a01db8ddaa04fd2cde539012db4dec1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e28920f1b4bd7ff8778a8ef5cd8795cbe7056eb9339aefdd67bef14c405f19
3
  size 1064
trainer_state.json CHANGED
@@ -3,65 +3,215 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 4420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.11,
13
- "learning_rate": 0.00019375130200295878,
14
- "loss": 0.4073,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.23,
19
- "learning_rate": 0.0001757861325449997,
20
- "loss": 0.4069,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.34,
25
- "learning_rate": 0.00014834966999429178,
26
- "loss": 0.4009,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.45,
31
- "learning_rate": 0.00011487075772256517,
32
- "loss": 0.3973,
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 0.57,
37
- "learning_rate": 7.953338797092902e-05,
38
- "loss": 0.3955,
39
  "step": 2500
40
  },
41
  {
42
- "epoch": 0.68,
43
- "learning_rate": 4.6753811771138364e-05,
44
- "loss": 0.3905,
45
  "step": 3000
46
  },
47
  {
48
- "epoch": 0.79,
49
- "learning_rate": 2.062862256606306e-05,
50
- "loss": 0.3881,
51
  "step": 3500
52
  },
53
  {
54
- "epoch": 0.9,
55
- "learning_rate": 4.422788704864633e-06,
56
- "loss": 0.3841,
57
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 4420,
62
  "num_train_epochs": 1,
63
  "save_steps": 500,
64
- "total_flos": 3.644311294921707e+17,
65
  "trial_name": null,
66
  "trial_params": null
67
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 16844,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "learning_rate": 0.00019956548524376711,
14
+ "loss": 0.3719,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.06,
19
+ "learning_rate": 0.0001982657170365362,
20
+ "loss": 0.3765,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.09,
25
+ "learning_rate": 0.00019611199074762167,
26
+ "loss": 0.3756,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 0.12,
31
+ "learning_rate": 0.000193123022894092,
32
+ "loss": 0.3751,
33
  "step": 2000
34
  },
35
  {
36
+ "epoch": 0.15,
37
+ "learning_rate": 0.00018932478848871238,
38
+ "loss": 0.376,
39
  "step": 2500
40
  },
41
  {
42
+ "epoch": 0.18,
43
+ "learning_rate": 0.00018475029530941827,
44
+ "loss": 0.3765,
45
  "step": 3000
46
  },
47
  {
48
+ "epoch": 0.21,
49
+ "learning_rate": 0.00017943929705198342,
50
+ "loss": 0.3737,
51
  "step": 3500
52
  },
53
  {
54
+ "epoch": 0.24,
55
+ "learning_rate": 0.00017343794785867154,
56
+ "loss": 0.37,
57
  "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.27,
61
+ "learning_rate": 0.00016679840122511857,
62
+ "loss": 0.3791,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.3,
67
+ "learning_rate": 0.00015957835677106406,
68
+ "loss": 0.3706,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.33,
73
+ "learning_rate": 0.00015184055881362684,
74
+ "loss": 0.3785,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.36,
79
+ "learning_rate": 0.00014365225110067207,
80
+ "loss": 0.3701,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.39,
85
+ "learning_rate": 0.00013508459244279678,
86
+ "loss": 0.3733,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.42,
91
+ "learning_rate": 0.00012621203832226526,
92
+ "loss": 0.3713,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 0.45,
97
+ "learning_rate": 0.00011711169385289445,
98
+ "loss": 0.3731,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 0.47,
103
+ "learning_rate": 0.00010786264371385917,
104
+ "loss": 0.377,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.5,
109
+ "learning_rate": 9.854526488049042e-05,
110
+ "loss": 0.3731,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.53,
115
+ "learning_rate": 8.924052812463844e-05,
116
+ "loss": 0.3762,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.56,
121
+ "learning_rate": 8.002929435476878e-05,
122
+ "loss": 0.3777,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.59,
127
+ "learning_rate": 7.099161191080386e-05,
128
+ "loss": 0.3699,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 0.62,
133
+ "learning_rate": 6.220602092042465e-05,
134
+ "loss": 0.3778,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 0.65,
139
+ "learning_rate": 5.3748870762182066e-05,
140
+ "loss": 0.371,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 0.68,
145
+ "learning_rate": 4.5693656566864785e-05,
146
+ "loss": 0.3747,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 0.71,
151
+ "learning_rate": 3.81103805231225e-05,
152
+ "loss": 0.3703,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 0.74,
157
+ "learning_rate": 3.1064943537786984e-05,
158
+ "loss": 0.3738,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 0.77,
163
+ "learning_rate": 2.4618572537543038e-05,
164
+ "loss": 0.3739,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 0.8,
169
+ "learning_rate": 1.882728838886583e-05,
170
+ "loss": 0.372,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 0.83,
175
+ "learning_rate": 1.3741419060158056e-05,
176
+ "loss": 0.3716,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 0.86,
181
+ "learning_rate": 9.405162256851662e-06,
182
+ "loss": 0.3698,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 0.89,
187
+ "learning_rate": 5.8562013303037124e-06,
188
+ "loss": 0.3743,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 0.92,
193
+ "learning_rate": 3.1253777983517363e-06,
194
+ "loss": 0.376,
195
+ "step": 15500
196
+ },
197
+ {
198
+ "epoch": 0.95,
199
+ "learning_rate": 1.236423323421776e-06,
200
+ "loss": 0.3746,
201
+ "step": 16000
202
+ },
203
+ {
204
+ "epoch": 0.98,
205
+ "learning_rate": 2.0575347737803452e-07,
206
+ "loss": 0.3738,
207
+ "step": 16500
208
  }
209
  ],
210
  "logging_steps": 500,
211
+ "max_steps": 16844,
212
  "num_train_epochs": 1,
213
  "save_steps": 500,
214
+ "total_flos": 3.457848488949235e+17,
215
  "trial_name": null,
216
  "trial_params": null
217
  }