youssefedweqd commited on
Commit
7da5849
·
verified ·
1 Parent(s): b888545

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "q_proj",
27
  "v_proj",
28
- "k_proj",
29
- "up_proj",
30
  "down_proj",
31
- "gate_proj",
32
- "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "gate_proj",
27
  "q_proj",
28
  "v_proj",
29
+ "o_proj",
 
30
  "down_proj",
31
+ "k_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:027c28cbacad0920c7a8ec1a4dbaf396f0658e37d9c57aa24903513cf568bf29
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6902c0fce949c015e61a7cdda21d0e5c6be08194c3d28ed5ba69f90c5450bcd9
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a811f08d635f9fd429d0ac8672eee899607dd871ece10f326b8ec3e7266d9db2
3
  size 323292202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c212e47a52e22fe8fe64b4cb3e89bc0da4e6416db71ad7a9e1fd98449fb852
3
  size 323292202
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:824d4a418ca52dbceab02ca3bdda11d00d54b246084fd87a75671a28233a0cb2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3aee0c07617101343e05feaf02d5053ded9a2c41e9667f836fb2f6a3de2e334
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.16556291390728478,
6
  "eval_steps": 100,
7
- "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -164,6 +164,146 @@
164
  "learning_rate": 5.513245033112583e-05,
165
  "loss": 0.7213,
166
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  }
168
  ],
169
  "logging_steps": 50,
@@ -183,7 +323,7 @@
183
  "attributes": {}
184
  }
185
  },
186
- "total_flos": 7833052747137024.0,
187
  "train_batch_size": 1,
188
  "trial_name": null,
189
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.33112582781456956,
6
  "eval_steps": 100,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
164
  "learning_rate": 5.513245033112583e-05,
165
  "loss": 0.7213,
166
  "step": 1000
167
+ },
168
+ {
169
+ "epoch": 0.173841059602649,
170
+ "grad_norm": 1.8289754390716553,
171
+ "learning_rate": 5.789183222958058e-05,
172
+ "loss": 0.7335,
173
+ "step": 1050
174
+ },
175
+ {
176
+ "epoch": 0.18211920529801323,
177
+ "grad_norm": 1.4989681243896484,
178
+ "learning_rate": 6.065121412803533e-05,
179
+ "loss": 0.7326,
180
+ "step": 1100
181
+ },
182
+ {
183
+ "epoch": 0.19039735099337748,
184
+ "grad_norm": 1.5326098203659058,
185
+ "learning_rate": 6.341059602649006e-05,
186
+ "loss": 0.7311,
187
+ "step": 1150
188
+ },
189
+ {
190
+ "epoch": 0.1986754966887417,
191
+ "grad_norm": 1.4897147417068481,
192
+ "learning_rate": 6.616997792494481e-05,
193
+ "loss": 0.6918,
194
+ "step": 1200
195
+ },
196
+ {
197
+ "epoch": 0.20695364238410596,
198
+ "grad_norm": 1.634765863418579,
199
+ "learning_rate": 6.892935982339957e-05,
200
+ "loss": 0.7051,
201
+ "step": 1250
202
+ },
203
+ {
204
+ "epoch": 0.2152317880794702,
205
+ "grad_norm": 1.4463587999343872,
206
+ "learning_rate": 7.168874172185431e-05,
207
+ "loss": 0.6955,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 0.22350993377483444,
212
+ "grad_norm": 1.632133960723877,
213
+ "learning_rate": 7.444812362030905e-05,
214
+ "loss": 0.6901,
215
+ "step": 1350
216
+ },
217
+ {
218
+ "epoch": 0.23178807947019867,
219
+ "grad_norm": 1.4062328338623047,
220
+ "learning_rate": 7.72075055187638e-05,
221
+ "loss": 0.6833,
222
+ "step": 1400
223
+ },
224
+ {
225
+ "epoch": 0.24006622516556292,
226
+ "grad_norm": 1.2914466857910156,
227
+ "learning_rate": 7.996688741721855e-05,
228
+ "loss": 0.6663,
229
+ "step": 1450
230
+ },
231
+ {
232
+ "epoch": 0.24834437086092714,
233
+ "grad_norm": 1.4995919466018677,
234
+ "learning_rate": 8.272626931567329e-05,
235
+ "loss": 0.6959,
236
+ "step": 1500
237
+ },
238
+ {
239
+ "epoch": 0.25662251655629137,
240
+ "grad_norm": 1.1299749612808228,
241
+ "learning_rate": 8.548565121412803e-05,
242
+ "loss": 0.6685,
243
+ "step": 1550
244
+ },
245
+ {
246
+ "epoch": 0.26490066225165565,
247
+ "grad_norm": 1.329004168510437,
248
+ "learning_rate": 8.824503311258279e-05,
249
+ "loss": 0.6678,
250
+ "step": 1600
251
+ },
252
+ {
253
+ "epoch": 0.2731788079470199,
254
+ "grad_norm": 1.5191948413848877,
255
+ "learning_rate": 9.100441501103754e-05,
256
+ "loss": 0.6731,
257
+ "step": 1650
258
+ },
259
+ {
260
+ "epoch": 0.2814569536423841,
261
+ "grad_norm": 1.739169716835022,
262
+ "learning_rate": 9.376379690949227e-05,
263
+ "loss": 0.6691,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 0.2897350993377483,
268
+ "grad_norm": 1.2906118631362915,
269
+ "learning_rate": 9.652317880794703e-05,
270
+ "loss": 0.6718,
271
+ "step": 1750
272
+ },
273
+ {
274
+ "epoch": 0.2980132450331126,
275
+ "grad_norm": 1.289502501487732,
276
+ "learning_rate": 9.928256070640178e-05,
277
+ "loss": 0.6581,
278
+ "step": 1800
279
+ },
280
+ {
281
+ "epoch": 0.30629139072847683,
282
+ "grad_norm": 1.3923128843307495,
283
+ "learning_rate": 9.999872989402833e-05,
284
+ "loss": 0.6589,
285
+ "step": 1850
286
+ },
287
+ {
288
+ "epoch": 0.31456953642384106,
289
+ "grad_norm": 1.1048816442489624,
290
+ "learning_rate": 9.999297790520483e-05,
291
+ "loss": 0.6341,
292
+ "step": 1900
293
+ },
294
+ {
295
+ "epoch": 0.3228476821192053,
296
+ "grad_norm": 1.3568603992462158,
297
+ "learning_rate": 9.998258777484084e-05,
298
+ "loss": 0.6318,
299
+ "step": 1950
300
+ },
301
+ {
302
+ "epoch": 0.33112582781456956,
303
+ "grad_norm": 0.923786997795105,
304
+ "learning_rate": 9.996756046688961e-05,
305
+ "loss": 0.6318,
306
+ "step": 2000
307
  }
308
  ],
309
  "logging_steps": 50,
 
323
  "attributes": {}
324
  }
325
  },
326
+ "total_flos": 1.5698763986239488e+16,
327
  "train_batch_size": 1,
328
  "trial_name": null,
329
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dd2ef96eff028fc6db83c8627ce2e789cafe652a25ea367c040819bc392f916
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2543e07a37d2c3de3cd8e1d682eb10ddfc7a8cf84209a331e0b0e44870af81c3
3
  size 5752