superklein18 commited on
Commit
0caf718
·
verified ·
1 Parent(s): 5829b3a

superklein18/test

Browse files
adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "k_proj",
27
- "gate_proj",
28
- "down_proj",
29
  "o_proj",
30
- "up_proj",
 
31
  "q_proj",
32
- "v_proj"
 
 
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "o_proj",
27
+ "v_proj",
28
+ "down_proj",
29
  "q_proj",
30
+ "k_proj",
31
+ "up_proj",
32
+ "gate_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.1567674662395578,
5
- "train_runtime": 349.2812,
6
- "train_samples_per_second": 4.272,
7
- "train_steps_per_second": 1.068
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 3.408519426981608,
5
+ "train_runtime": 5.3591,
6
+ "train_samples_per_second": 1.679,
7
+ "train_steps_per_second": 0.56
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52685fab0e3fe31ab3d22be9347155dc8a4164ca4ea95785a467cf7549976873
3
  size 3554214752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf94a1af6dc1e79b55b255947e1eb03fd547da6ee4081eb9bdfe650a310d67e
3
  size 3554214752
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.1567674662395578,
5
- "train_runtime": 349.2812,
6
- "train_samples_per_second": 4.272,
7
- "train_steps_per_second": 1.068
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 3.408519426981608,
5
+ "train_runtime": 5.3591,
6
+ "train_samples_per_second": 1.679,
7
+ "train_steps_per_second": 0.56
8
  }
trainer_state.json CHANGED
@@ -3,296 +3,30 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 373,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.02680965147453083,
13
- "grad_norm": 0.08274202048778534,
14
- "learning_rate": 9.731903485254692e-05,
15
- "loss": 5.8472,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.05361930294906166,
20
- "grad_norm": 0.03481147810816765,
21
- "learning_rate": 9.463806970509384e-05,
22
- "loss": 0.0001,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.08042895442359249,
27
- "grad_norm": 0.03238165006041527,
28
- "learning_rate": 9.195710455764074e-05,
29
- "loss": 0.0,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.10723860589812333,
34
- "grad_norm": 0.030895311385393143,
35
- "learning_rate": 8.927613941018768e-05,
36
- "loss": 0.0,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.13404825737265416,
41
- "grad_norm": 0.029906680807471275,
42
- "learning_rate": 8.65951742627346e-05,
43
- "loss": 0.0,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.16085790884718498,
48
- "grad_norm": 0.029279008507728577,
49
- "learning_rate": 8.391420911528151e-05,
50
- "loss": 0.0,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.1876675603217158,
55
- "grad_norm": 0.028912600129842758,
56
- "learning_rate": 8.123324396782842e-05,
57
- "loss": 0.0,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.1876675603217158,
62
- "eval_runtime": 27.496,
63
- "eval_samples_per_second": 13.566,
64
- "eval_steps_per_second": 0.873,
65
- "step": 70
66
- },
67
- {
68
- "epoch": 0.21447721179624665,
69
- "grad_norm": 0.028546025976538658,
70
- "learning_rate": 7.855227882037533e-05,
71
- "loss": 0.0,
72
- "step": 80
73
- },
74
- {
75
- "epoch": 0.24128686327077747,
76
- "grad_norm": 0.028238529339432716,
77
- "learning_rate": 7.587131367292225e-05,
78
- "loss": 0.0,
79
- "step": 90
80
- },
81
- {
82
- "epoch": 0.2680965147453083,
83
- "grad_norm": 0.02808106318116188,
84
- "learning_rate": 7.319034852546918e-05,
85
- "loss": 0.0,
86
- "step": 100
87
- },
88
- {
89
- "epoch": 0.2949061662198391,
90
- "grad_norm": 0.02785572223365307,
91
- "learning_rate": 7.050938337801609e-05,
92
- "loss": 0.0,
93
- "step": 110
94
- },
95
- {
96
- "epoch": 0.32171581769436997,
97
- "grad_norm": 0.027657197788357735,
98
- "learning_rate": 6.7828418230563e-05,
99
- "loss": 0.0,
100
- "step": 120
101
- },
102
- {
103
- "epoch": 0.3485254691689008,
104
- "grad_norm": 0.027470039203763008,
105
- "learning_rate": 6.514745308310992e-05,
106
- "loss": 0.0,
107
- "step": 130
108
- },
109
- {
110
- "epoch": 0.3753351206434316,
111
- "grad_norm": 0.027397796511650085,
112
- "learning_rate": 6.246648793565684e-05,
113
- "loss": 0.0,
114
- "step": 140
115
- },
116
- {
117
- "epoch": 0.40214477211796246,
118
- "grad_norm": 0.027218900620937347,
119
- "learning_rate": 5.978552278820375e-05,
120
- "loss": 0.0,
121
- "step": 150
122
- },
123
- {
124
- "epoch": 0.4289544235924933,
125
- "grad_norm": 0.027058888226747513,
126
- "learning_rate": 5.7104557640750675e-05,
127
- "loss": 0.0,
128
- "step": 160
129
- },
130
- {
131
- "epoch": 0.45576407506702415,
132
- "grad_norm": 0.02702343836426735,
133
- "learning_rate": 5.442359249329759e-05,
134
- "loss": 0.0,
135
- "step": 170
136
- },
137
- {
138
- "epoch": 0.48257372654155495,
139
- "grad_norm": 0.026990406215190887,
140
- "learning_rate": 5.174262734584451e-05,
141
- "loss": 0.0,
142
- "step": 180
143
- },
144
- {
145
- "epoch": 0.5093833780160858,
146
- "grad_norm": 0.026836372911930084,
147
- "learning_rate": 4.906166219839142e-05,
148
- "loss": 0.0,
149
- "step": 190
150
- },
151
- {
152
- "epoch": 0.5361930294906166,
153
- "grad_norm": 0.026813047006726265,
154
- "learning_rate": 4.638069705093834e-05,
155
- "loss": 0.0,
156
- "step": 200
157
- },
158
- {
159
- "epoch": 0.5630026809651475,
160
- "grad_norm": 0.02667263336479664,
161
- "learning_rate": 4.3699731903485256e-05,
162
- "loss": 0.0,
163
- "step": 210
164
- },
165
- {
166
- "epoch": 0.5898123324396782,
167
- "grad_norm": 0.026654915884137154,
168
- "learning_rate": 4.1018766756032174e-05,
169
- "loss": 0.0,
170
- "step": 220
171
- },
172
- {
173
- "epoch": 0.6166219839142091,
174
- "grad_norm": 0.02663583494722843,
175
- "learning_rate": 3.8337801608579085e-05,
176
- "loss": 0.0,
177
- "step": 230
178
- },
179
- {
180
- "epoch": 0.6434316353887399,
181
- "grad_norm": 0.02650284953415394,
182
- "learning_rate": 3.565683646112601e-05,
183
- "loss": 0.0,
184
- "step": 240
185
- },
186
- {
187
- "epoch": 0.6702412868632708,
188
- "grad_norm": 0.026488320901989937,
189
- "learning_rate": 3.297587131367292e-05,
190
- "loss": 0.0,
191
- "step": 250
192
- },
193
- {
194
- "epoch": 0.6970509383378016,
195
- "grad_norm": 0.02647605538368225,
196
- "learning_rate": 3.0294906166219838e-05,
197
- "loss": 0.0,
198
- "step": 260
199
- },
200
- {
201
- "epoch": 0.7238605898123325,
202
- "grad_norm": 0.026349999010562897,
203
- "learning_rate": 2.761394101876676e-05,
204
- "loss": 0.0,
205
- "step": 270
206
- },
207
- {
208
- "epoch": 0.7506702412868632,
209
- "grad_norm": 0.026226293295621872,
210
- "learning_rate": 2.4932975871313673e-05,
211
- "loss": 0.0,
212
- "step": 280
213
- },
214
- {
215
- "epoch": 0.7774798927613941,
216
- "grad_norm": 0.026215018704533577,
217
- "learning_rate": 2.225201072386059e-05,
218
- "loss": 0.0,
219
- "step": 290
220
- },
221
- {
222
- "epoch": 0.8042895442359249,
223
- "grad_norm": 0.02620949037373066,
224
- "learning_rate": 1.9571045576407505e-05,
225
- "loss": 0.0,
226
- "step": 300
227
- },
228
- {
229
- "epoch": 0.8310991957104558,
230
- "grad_norm": 0.02621125429868698,
231
- "learning_rate": 1.6890080428954423e-05,
232
- "loss": 0.0,
233
- "step": 310
234
- },
235
- {
236
- "epoch": 0.8579088471849866,
237
- "grad_norm": 0.026199743151664734,
238
- "learning_rate": 1.4209115281501343e-05,
239
- "loss": 0.0,
240
- "step": 320
241
- },
242
- {
243
- "epoch": 0.8847184986595175,
244
- "grad_norm": 0.026200657710433006,
245
- "learning_rate": 1.1528150134048257e-05,
246
- "loss": 0.0,
247
- "step": 330
248
- },
249
- {
250
- "epoch": 0.9115281501340483,
251
- "grad_norm": 0.02620103396475315,
252
- "learning_rate": 8.847184986595175e-06,
253
- "loss": 0.0,
254
- "step": 340
255
- },
256
- {
257
- "epoch": 0.938337801608579,
258
- "grad_norm": 0.026197846978902817,
259
- "learning_rate": 6.1662198391420915e-06,
260
- "loss": 0.0,
261
- "step": 350
262
- },
263
- {
264
- "epoch": 0.9651474530831099,
265
- "grad_norm": 0.026203904300928116,
266
- "learning_rate": 3.4852546916890083e-06,
267
- "loss": 0.0,
268
- "step": 360
269
- },
270
- {
271
- "epoch": 0.9919571045576407,
272
- "grad_norm": 0.0262033361941576,
273
- "learning_rate": 8.04289544235925e-07,
274
- "loss": 0.0,
275
- "step": 370
276
- },
277
  {
278
  "epoch": 1.0,
279
- "eval_runtime": 26.0963,
280
- "eval_samples_per_second": 14.293,
281
- "eval_steps_per_second": 0.92,
282
- "step": 373
283
  },
284
  {
285
  "epoch": 1.0,
286
- "step": 373,
287
  "total_flos": 0.0,
288
- "train_loss": 0.1567674662395578,
289
- "train_runtime": 349.2812,
290
- "train_samples_per_second": 4.272,
291
- "train_steps_per_second": 1.068
292
  }
293
  ],
294
  "logging_steps": 10,
295
- "max_steps": 373,
296
  "num_input_tokens_seen": 0,
297
  "num_train_epochs": 1,
298
  "save_steps": 500,
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
+ "eval_runtime": 0.2175,
14
+ "eval_samples_per_second": 13.792,
15
+ "eval_steps_per_second": 4.597,
16
+ "step": 3
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "step": 3,
21
  "total_flos": 0.0,
22
+ "train_loss": 3.408519426981608,
23
+ "train_runtime": 5.3591,
24
+ "train_samples_per_second": 1.679,
25
+ "train_steps_per_second": 0.56
26
  }
27
  ],
28
  "logging_steps": 10,
29
+ "max_steps": 3,
30
  "num_input_tokens_seen": 0,
31
  "num_train_epochs": 1,
32
  "save_steps": 500,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34c93f8087af42de0f66def80a14024461a78d1da002000e529291da561312d8
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef475d08bd3cb77c7aaecf91a6a5fed0b205da33c7a82775209cb1361b04f9f
3
  size 5432