robertou2 commited on
Commit
fb43fef
·
verified ·
1 Parent(s): 15078df

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ba8e8c80803d2a11a70df9d4dacb77457eed010f3898a3e490266b2b1d0f304
3
  size 119801528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f65be280500f9efd4e7fcf8f26e2b825a078f269733bf309a52f06a7ab655e
3
  size 119801528
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e175f62ebf7fe247327e70639ff1cd0a6113e616d2b57e42eb7b074ff5565e78
3
  size 239893323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d20cbc270f68359e1c4dcbcf2bbd43789852552e43bdf73ce0e1f01ff8e03f5
3
  size 239893323
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d3cd19daaf428db09a275ce94ac3f4a37a8f517f6ec3f0aebca352aaf3b9190
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bb492c21af1e402f8bd2306bdfbc3ea5270d01dcc213399de0f30d6c4d5b284
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3d5af38a40ff70252c3be98535c6d89a09a82aa37dfb5a2e298d6edde343dca
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba534ce755aa4b07bf85a12cd1cb3f87d8540ac06b99529635659d02895264e
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.121212121212121,
6
  "eval_steps": 500,
7
- "global_step": 28,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -204,6 +204,118 @@
204
  "learning_rate": 0.00032725424859373687,
205
  "loss": 0.5318,
206
  "step": 28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  }
208
  ],
209
  "logging_steps": 1,
@@ -223,7 +335,7 @@
223
  "attributes": {}
224
  }
225
  },
226
- "total_flos": 1418923874893824.0,
227
  "train_batch_size": 2,
228
  "trial_name": null,
229
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.96969696969697,
6
  "eval_steps": 500,
7
+ "global_step": 44,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
204
  "learning_rate": 0.00032725424859373687,
205
  "loss": 0.5318,
206
  "step": 28
207
+ },
208
+ {
209
+ "epoch": 3.242424242424242,
210
+ "grad_norm": 0.8166446089744568,
211
+ "learning_rate": 0.0003019779227044398,
212
+ "loss": 0.9006,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 3.3636363636363638,
217
+ "grad_norm": 1.2094213962554932,
218
+ "learning_rate": 0.0002761321158169134,
219
+ "loss": 0.6391,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 3.484848484848485,
224
+ "grad_norm": 0.9135984778404236,
225
+ "learning_rate": 0.00025,
226
+ "loss": 0.6285,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 3.606060606060606,
231
+ "grad_norm": 0.9478852152824402,
232
+ "learning_rate": 0.00022386788418308668,
233
+ "loss": 0.6269,
234
+ "step": 32
235
+ },
236
+ {
237
+ "epoch": 3.7272727272727275,
238
+ "grad_norm": 0.5533197522163391,
239
+ "learning_rate": 0.0001980220772955602,
240
+ "loss": 0.5646,
241
+ "step": 33
242
+ },
243
+ {
244
+ "epoch": 3.8484848484848486,
245
+ "grad_norm": 1.0226417779922485,
246
+ "learning_rate": 0.00017274575140626317,
247
+ "loss": 0.5521,
248
+ "step": 34
249
+ },
250
+ {
251
+ "epoch": 3.9696969696969697,
252
+ "grad_norm": 1.2138278484344482,
253
+ "learning_rate": 0.00014831583923105,
254
+ "loss": 0.8734,
255
+ "step": 35
256
+ },
257
+ {
258
+ "epoch": 4.0,
259
+ "grad_norm": 2.8926355838775635,
260
+ "learning_rate": 0.00012500000000000006,
261
+ "loss": 0.8668,
262
+ "step": 36
263
+ },
264
+ {
265
+ "epoch": 4.121212121212121,
266
+ "grad_norm": 0.9145299792289734,
267
+ "learning_rate": 0.00010305368692688174,
268
+ "loss": 0.2851,
269
+ "step": 37
270
+ },
271
+ {
272
+ "epoch": 4.242424242424242,
273
+ "grad_norm": 0.7148826718330383,
274
+ "learning_rate": 8.271734841028553e-05,
275
+ "loss": 0.6555,
276
+ "step": 38
277
+ },
278
+ {
279
+ "epoch": 4.363636363636363,
280
+ "grad_norm": 1.015910267829895,
281
+ "learning_rate": 6.421379363065141e-05,
282
+ "loss": 0.3664,
283
+ "step": 39
284
+ },
285
+ {
286
+ "epoch": 4.484848484848484,
287
+ "grad_norm": 0.9201410412788391,
288
+ "learning_rate": 4.7745751406263163e-05,
289
+ "loss": 0.575,
290
+ "step": 40
291
+ },
292
+ {
293
+ "epoch": 4.606060606060606,
294
+ "grad_norm": 0.8212230801582336,
295
+ "learning_rate": 3.3493649053890325e-05,
296
+ "loss": 0.4084,
297
+ "step": 41
298
+ },
299
+ {
300
+ "epoch": 4.7272727272727275,
301
+ "grad_norm": 0.8163782358169556,
302
+ "learning_rate": 2.1613635589349755e-05,
303
+ "loss": 0.3754,
304
+ "step": 42
305
+ },
306
+ {
307
+ "epoch": 4.848484848484849,
308
+ "grad_norm": 0.7215772867202759,
309
+ "learning_rate": 1.2235870926211617e-05,
310
+ "loss": 0.2209,
311
+ "step": 43
312
+ },
313
+ {
314
+ "epoch": 4.96969696969697,
315
+ "grad_norm": 1.07026207447052,
316
+ "learning_rate": 5.463099816548578e-06,
317
+ "loss": 0.3138,
318
+ "step": 44
319
  }
320
  ],
321
  "logging_steps": 1,
 
335
  "attributes": {}
336
  }
337
  },
338
+ "total_flos": 2267168340344832.0,
339
  "train_batch_size": 2,
340
  "trial_name": null,
341
  "trial_params": null