EbrahemHesham commited on
Commit
c5989a3
·
verified ·
1 Parent(s): 1602c96

Training in progress, step 270, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2b68f939a1ac16287167cfe01619979f639ace315e759fe243def56bc7e709a
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ebe99d1ad4c258f1237f57aef6685beeb79122cdc7802ac0bacc3a47816f656
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c25e2c917453589390f69ad73c1e25677e6004a421b6e103961137e3994126f
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0caf5f99eabea4f6ea151c88df39573a4bdb60aba4df360468a0e44f174ae47b
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f994073fa65bd3eeec886197c2259b5a6406cdb6b5ea5f198b369e2b78371547
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca50632b9dcfeaf56f29cc41af869dfc765fe5c731289691cb32c1dd52ebe96
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 7.411214953271028,
6
  "eval_steps": 100,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -304,6 +304,104 @@
304
  "eval_samples_per_second": 9.296,
305
  "eval_steps_per_second": 9.296,
306
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  }
308
  ],
309
  "logging_steps": 5,
@@ -318,12 +416,12 @@
318
  "should_evaluate": false,
319
  "should_log": false,
320
  "should_save": true,
321
- "should_training_stop": false
322
  },
323
  "attributes": {}
324
  }
325
  },
326
- "total_flos": 5.452252790312141e+16,
327
  "train_batch_size": 16,
328
  "trial_name": null,
329
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 10.0,
6
  "eval_steps": 100,
7
+ "global_step": 270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
304
  "eval_samples_per_second": 9.296,
305
  "eval_steps_per_second": 9.296,
306
  "step": 200
307
+ },
308
+ {
309
+ "epoch": 7.598130841121495,
310
+ "grad_norm": 0.2390548288822174,
311
+ "learning_rate": 1.7123931571546827e-05,
312
+ "loss": 0.0116,
313
+ "step": 205
314
+ },
315
+ {
316
+ "epoch": 7.785046728971962,
317
+ "grad_norm": 0.1910872757434845,
318
+ "learning_rate": 1.4759117090312197e-05,
319
+ "loss": 0.0122,
320
+ "step": 210
321
+ },
322
+ {
323
+ "epoch": 7.97196261682243,
324
+ "grad_norm": 0.18526358902454376,
325
+ "learning_rate": 1.25415076745532e-05,
326
+ "loss": 0.0128,
327
+ "step": 215
328
+ },
329
+ {
330
+ "epoch": 8.149532710280374,
331
+ "grad_norm": 0.14037036895751953,
332
+ "learning_rate": 1.0480366524062042e-05,
333
+ "loss": 0.0104,
334
+ "step": 220
335
+ },
336
+ {
337
+ "epoch": 8.336448598130842,
338
+ "grad_norm": 0.1647230088710785,
339
+ "learning_rate": 8.584303253381847e-06,
340
+ "loss": 0.0096,
341
+ "step": 225
342
+ },
343
+ {
344
+ "epoch": 8.523364485981308,
345
+ "grad_norm": 0.12098614126443863,
346
+ "learning_rate": 6.861237928494579e-06,
347
+ "loss": 0.0087,
348
+ "step": 230
349
+ },
350
+ {
351
+ "epoch": 8.710280373831775,
352
+ "grad_norm": 0.1445273905992508,
353
+ "learning_rate": 5.318367983829392e-06,
354
+ "loss": 0.0091,
355
+ "step": 235
356
+ },
357
+ {
358
+ "epoch": 8.897196261682243,
359
+ "grad_norm": 0.14538030326366425,
360
+ "learning_rate": 3.962138157783085e-06,
361
+ "loss": 0.0093,
362
+ "step": 240
363
+ },
364
+ {
365
+ "epoch": 9.074766355140186,
366
+ "grad_norm": 0.11064188182353973,
367
+ "learning_rate": 2.798213572335001e-06,
368
+ "loss": 0.0082,
369
+ "step": 245
370
+ },
371
+ {
372
+ "epoch": 9.261682242990654,
373
+ "grad_norm": 0.12358280271291733,
374
+ "learning_rate": 1.8314560692059835e-06,
375
+ "loss": 0.0088,
376
+ "step": 250
377
+ },
378
+ {
379
+ "epoch": 9.448598130841122,
380
+ "grad_norm": 0.11256805807352066,
381
+ "learning_rate": 1.0659039014077944e-06,
382
+ "loss": 0.0078,
383
+ "step": 255
384
+ },
385
+ {
386
+ "epoch": 9.63551401869159,
387
+ "grad_norm": 0.13209514319896698,
388
+ "learning_rate": 5.047548650136513e-07,
389
+ "loss": 0.0093,
390
+ "step": 260
391
+ },
392
+ {
393
+ "epoch": 9.822429906542055,
394
+ "grad_norm": 0.11688179522752762,
395
+ "learning_rate": 1.503529416103988e-07,
396
+ "loss": 0.0082,
397
+ "step": 265
398
+ },
399
+ {
400
+ "epoch": 10.0,
401
+ "grad_norm": 0.1742771714925766,
402
+ "learning_rate": 4.178507228136397e-09,
403
+ "loss": 0.0076,
404
+ "step": 270
405
  }
406
  ],
407
  "logging_steps": 5,
 
416
  "should_evaluate": false,
417
  "should_log": false,
418
  "should_save": true,
419
+ "should_training_stop": true
420
  },
421
  "attributes": {}
422
  }
423
  },
424
+ "total_flos": 7.343746636637798e+16,
425
  "train_batch_size": 16,
426
  "trial_name": null,
427
  "trial_params": null