EbrahemHesham commited on
Commit
a08e226
·
verified ·
1 Parent(s): 152d685

Training in progress, step 270, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ecb69f42c4102287be171762bb98640bcc97b4631086383d36dcfa0f5d1a677
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae3b1fa8c31aabbe9ab45a20bd05cb09885864f566e475e30f433820052b4171
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58b4a023d7f908cb321232429392e5505fde05980c4e0083630c2ac62ba19284
3
  size 591203178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fdf8c6ddfc5f151321a3c63a95bb87e96c0c51829e1a421ff772c199002a38a
3
  size 591203178
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f994073fa65bd3eeec886197c2259b5a6406cdb6b5ea5f198b369e2b78371547
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca50632b9dcfeaf56f29cc41af869dfc765fe5c731289691cb32c1dd52ebe96
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 7.411214953271028,
6
  "eval_steps": 100,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -304,6 +304,104 @@
304
  "eval_samples_per_second": 11.956,
305
  "eval_steps_per_second": 11.956,
306
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  }
308
  ],
309
  "logging_steps": 5,
@@ -318,12 +416,12 @@
318
  "should_evaluate": false,
319
  "should_log": false,
320
  "should_save": true,
321
- "should_training_stop": false
322
  },
323
  "attributes": {}
324
  }
325
  },
326
- "total_flos": 2.607353817710592e+16,
327
  "train_batch_size": 16,
328
  "trial_name": null,
329
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 10.0,
6
  "eval_steps": 100,
7
+ "global_step": 270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
304
  "eval_samples_per_second": 11.956,
305
  "eval_steps_per_second": 11.956,
306
  "step": 200
307
+ },
308
+ {
309
+ "epoch": 7.598130841121495,
310
+ "grad_norm": 0.2857516407966614,
311
+ "learning_rate": 1.7123931571546827e-05,
312
+ "loss": 0.0151,
313
+ "step": 205
314
+ },
315
+ {
316
+ "epoch": 7.785046728971962,
317
+ "grad_norm": 0.22625453770160675,
318
+ "learning_rate": 1.4759117090312197e-05,
319
+ "loss": 0.0156,
320
+ "step": 210
321
+ },
322
+ {
323
+ "epoch": 7.97196261682243,
324
+ "grad_norm": 0.21999120712280273,
325
+ "learning_rate": 1.25415076745532e-05,
326
+ "loss": 0.0151,
327
+ "step": 215
328
+ },
329
+ {
330
+ "epoch": 8.149532710280374,
331
+ "grad_norm": 0.16067098081111908,
332
+ "learning_rate": 1.0480366524062042e-05,
333
+ "loss": 0.0133,
334
+ "step": 220
335
+ },
336
+ {
337
+ "epoch": 8.336448598130842,
338
+ "grad_norm": 0.18020516633987427,
339
+ "learning_rate": 8.584303253381847e-06,
340
+ "loss": 0.0122,
341
+ "step": 225
342
+ },
343
+ {
344
+ "epoch": 8.523364485981308,
345
+ "grad_norm": 0.16721676290035248,
346
+ "learning_rate": 6.861237928494579e-06,
347
+ "loss": 0.012,
348
+ "step": 230
349
+ },
350
+ {
351
+ "epoch": 8.710280373831775,
352
+ "grad_norm": 0.18559329211711884,
353
+ "learning_rate": 5.318367983829392e-06,
354
+ "loss": 0.0121,
355
+ "step": 235
356
+ },
357
+ {
358
+ "epoch": 8.897196261682243,
359
+ "grad_norm": 0.1803818792104721,
360
+ "learning_rate": 3.962138157783085e-06,
361
+ "loss": 0.0126,
362
+ "step": 240
363
+ },
364
+ {
365
+ "epoch": 9.074766355140186,
366
+ "grad_norm": 0.15274249017238617,
367
+ "learning_rate": 2.798213572335001e-06,
368
+ "loss": 0.0113,
369
+ "step": 245
370
+ },
371
+ {
372
+ "epoch": 9.261682242990654,
373
+ "grad_norm": 0.15818247199058533,
374
+ "learning_rate": 1.8314560692059835e-06,
375
+ "loss": 0.0112,
376
+ "step": 250
377
+ },
378
+ {
379
+ "epoch": 9.448598130841122,
380
+ "grad_norm": 0.14977800846099854,
381
+ "learning_rate": 1.0659039014077944e-06,
382
+ "loss": 0.0105,
383
+ "step": 255
384
+ },
385
+ {
386
+ "epoch": 9.63551401869159,
387
+ "grad_norm": 0.16695314645767212,
388
+ "learning_rate": 5.047548650136513e-07,
389
+ "loss": 0.0122,
390
+ "step": 260
391
+ },
392
+ {
393
+ "epoch": 9.822429906542055,
394
+ "grad_norm": 0.14471110701560974,
395
+ "learning_rate": 1.503529416103988e-07,
396
+ "loss": 0.0111,
397
+ "step": 265
398
+ },
399
+ {
400
+ "epoch": 10.0,
401
+ "grad_norm": 0.23003625869750977,
402
+ "learning_rate": 4.178507228136397e-09,
403
+ "loss": 0.0103,
404
+ "step": 270
405
  }
406
  ],
407
  "logging_steps": 5,
 
416
  "should_evaluate": false,
417
  "should_log": false,
418
  "should_save": true,
419
+ "should_training_stop": true
420
  },
421
  "attributes": {}
422
  }
423
  },
424
+ "total_flos": 3.511896195158016e+16,
425
  "train_batch_size": 16,
426
  "trial_name": null,
427
  "trial_params": null