guyhadad01 commited on
Commit
6370814
·
verified ·
1 Parent(s): 29eb288

Training in progress, step 25600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1531,6 +1531,14 @@ You can finetune this model on your own dataset.
1531
  | 0.4435 | 25100 | 0.3773 |
1532
  | 0.4444 | 25150 | 0.3372 |
1533
  | 0.4453 | 25200 | 0.3178 |
 
 
 
 
 
 
 
 
1534
 
1535
  </details>
1536
 
 
1531
  | 0.4435 | 25100 | 0.3773 |
1532
  | 0.4444 | 25150 | 0.3372 |
1533
  | 0.4453 | 25200 | 0.3178 |
1534
+ | 0.4462 | 25250 | 0.2745 |
1535
+ | 0.4471 | 25300 | 0.2773 |
1536
+ | 0.4480 | 25350 | 0.3822 |
1537
+ | 0.4488 | 25400 | 0.3851 |
1538
+ | 0.4497 | 25450 | 0.3805 |
1539
+ | 0.4506 | 25500 | 0.3245 |
1540
+ | 0.4515 | 25550 | 0.2978 |
1541
+ | 0.4524 | 25600 | 0.3397 |
1542
 
1543
  </details>
1544
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:841fdf3c58c8f29d1dbc4b6aa518d6d4f0e4c702d0d79eae99a7a6f05440afb8
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b4c41f05edc2d7e0e5b6aa96c7280a269723970b3d9f5db97a908d698c18e46
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d042b648040ac616e95ada7c18231cfcd360f4f2b7dea533dbe8440f0bfa84b
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad0f634d22e8d985415b64b93836e5380971e245fef96c6b35c06a87043b680
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ad48efbb9bf93a84390a5aea5643acbd41bf262a8aa17ab3278f0f314a581a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3d11ea4dc06732960ed6abb1d44f0f58d98e38680260f859b6e949eb007dff1
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8f8fa593fe1292958817d4226b917c242a5dd7ed49104de560771ecc5cb6968
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f87f293944d5e456973842209f35aaec89127a12621bc86fb62b420000afe5
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8139c38e3bba457a10055977964d7aab9920cd7bb0ccf9d0c0ca174f5b19226
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0157ec5a2cd87b323072c019eeb626da0770a47c34b4436db6020558b163b6ea
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.44530048947712536,
6
  "eval_steps": 500,
7
- "global_step": 25200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3536,6 +3536,62 @@
3536
  "learning_rate": 3.082601951660089e-05,
3537
  "loss": 0.3178,
3538
  "step": 25200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3539
  }
3540
  ],
3541
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4523687512148575,
6
  "eval_steps": 500,
7
+ "global_step": 25600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3536
  "learning_rate": 3.082601951660089e-05,
3537
  "loss": 0.3178,
3538
  "step": 25200
3539
+ },
3540
+ {
3541
+ "epoch": 0.44618402219434183,
3542
+ "grad_norm": 1.6131466627120972,
3543
+ "learning_rate": 3.077693349826236e-05,
3544
+ "loss": 0.2745,
3545
+ "step": 25250
3546
+ },
3547
+ {
3548
+ "epoch": 0.44706755491155836,
3549
+ "grad_norm": 1.5419201850891113,
3550
+ "learning_rate": 3.072784747992382e-05,
3551
+ "loss": 0.2773,
3552
+ "step": 25300
3553
+ },
3554
+ {
3555
+ "epoch": 0.4479510876287749,
3556
+ "grad_norm": 1.6418931484222412,
3557
+ "learning_rate": 3.067876146158528e-05,
3558
+ "loss": 0.3822,
3559
+ "step": 25350
3560
+ },
3561
+ {
3562
+ "epoch": 0.4488346203459914,
3563
+ "grad_norm": 1.288121223449707,
3564
+ "learning_rate": 3.0629675443246745e-05,
3565
+ "loss": 0.3851,
3566
+ "step": 25400
3567
+ },
3568
+ {
3569
+ "epoch": 0.44971815306320795,
3570
+ "grad_norm": 1.9523035287857056,
3571
+ "learning_rate": 3.058058942490821e-05,
3572
+ "loss": 0.3805,
3573
+ "step": 25450
3574
+ },
3575
+ {
3576
+ "epoch": 0.4506016857804245,
3577
+ "grad_norm": 3.3735404014587402,
3578
+ "learning_rate": 3.0531503406569674e-05,
3579
+ "loss": 0.3245,
3580
+ "step": 25500
3581
+ },
3582
+ {
3583
+ "epoch": 0.45148521849764095,
3584
+ "grad_norm": 1.4013001918792725,
3585
+ "learning_rate": 3.048241738823114e-05,
3586
+ "loss": 0.2978,
3587
+ "step": 25550
3588
+ },
3589
+ {
3590
+ "epoch": 0.4523687512148575,
3591
+ "grad_norm": 1.9055225849151611,
3592
+ "learning_rate": 3.0433331369892604e-05,
3593
+ "loss": 0.3397,
3594
+ "step": 25600
3595
  }
3596
  ],
3597
  "logging_steps": 50,