guyhadad01 commited on
Commit
8a6a9ad
·
verified ·
1 Parent(s): 4b3e6f5

Training in progress, step 46000, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1209,6 +1209,10 @@ You can finetune this model on your own dataset.
1209
  | 0.8075 | 45700 | 0.3975 |
1210
  | 0.8084 | 45750 | 0.391 |
1211
  | 0.8093 | 45800 | 0.3055 |
 
 
 
 
1212
 
1213
 
1214
  ### Framework Versions
 
1209
  | 0.8075 | 45700 | 0.3975 |
1210
  | 0.8084 | 45750 | 0.391 |
1211
  | 0.8093 | 45800 | 0.3055 |
1212
+ | 0.8102 | 45850 | 0.2434 |
1213
+ | 0.8111 | 45900 | 0.285 |
1214
+ | 0.8120 | 45950 | 0.3952 |
1215
+ | 0.8129 | 46000 | 0.2802 |
1216
 
1217
 
1218
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cd5b7a0c7bbd9cd3c1ef7b582da35f4528606a4ac86bb04de178a7045ffef85
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbc453c795079d8ded19817daacd8fec5d095bba6393f23049cff77eeb1abaf9
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:229981f79739c953722cfecdcfd1ea3bc38fb99e716343cc7272446d9e5bd67a
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303c03bcbf6290c5fb7fcf2c4927d56af5a210c091c12a9beba5ee6c9213f174
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0858441a43126cd8171b6b37146cb192258791649fae1dc1b48ff841ebace857
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431f36421847b278b660e6526fb15af6b02fbddb625572cac02bb7ad994d2dda
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:791a23171b5f58ee5e75b5820d77158a2c7a8600431496a1df7bd6fb2c50e26f
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e42fd89832885ab30327a4c2371265408743318772a17d94083f8bfb054483
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb572362d72f52e385494a9e0afaacf81d8323c35f20a7a978751625112b3a58
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4d32ff62b246761d94af48903691044aacc80825efb2f5658f28d83287222c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.809315968970331,
6
  "eval_steps": 500,
7
- "global_step": 45800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6420,6 +6420,34 @@
6420
  "learning_rate": 1.061141544442481e-05,
6421
  "loss": 0.3055,
6422
  "step": 45800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6423
  }
6424
  ],
6425
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8128500998391971,
6
  "eval_steps": 500,
7
+ "global_step": 46000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6420
  "learning_rate": 1.061141544442481e-05,
6421
  "loss": 0.3055,
6422
  "step": 45800
6423
+ },
6424
+ {
6425
+ "epoch": 0.8101995016875475,
6426
+ "grad_norm": 1.3673596382141113,
6427
+ "learning_rate": 1.0562329426086275e-05,
6428
+ "loss": 0.2434,
6429
+ "step": 45850
6430
+ },
6431
+ {
6432
+ "epoch": 0.811083034404764,
6433
+ "grad_norm": 2.5049281120300293,
6434
+ "learning_rate": 1.0513243407747738e-05,
6435
+ "loss": 0.285,
6436
+ "step": 45900
6437
+ },
6438
+ {
6439
+ "epoch": 0.8119665671219806,
6440
+ "grad_norm": 4.577225208282471,
6441
+ "learning_rate": 1.04641573894092e-05,
6442
+ "loss": 0.3952,
6443
+ "step": 45950
6444
+ },
6445
+ {
6446
+ "epoch": 0.8128500998391971,
6447
+ "grad_norm": 1.4778873920440674,
6448
+ "learning_rate": 1.0415071371070664e-05,
6449
+ "loss": 0.2802,
6450
+ "step": 46000
6451
  }
6452
  ],
6453
  "logging_steps": 50,