guyhadad01 commited on
Commit
9b8d6b9
·
verified ·
1 Parent(s): 98bd472

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1241,6 +1241,10 @@ You can finetune this model on your own dataset.
1241
  | 0.1891 | 10700 | 0.3533 |
1242
  | 0.1900 | 10750 | 0.5213 |
1243
  | 0.1908 | 10800 | 0.4372 |
 
 
 
 
1244
 
1245
 
1246
  ### Framework Versions
 
1241
  | 0.1891 | 10700 | 0.3533 |
1242
  | 0.1900 | 10750 | 0.5213 |
1243
  | 0.1908 | 10800 | 0.4372 |
1244
+ | 0.1917 | 10850 | 0.3286 |
1245
+ | 0.1926 | 10900 | 0.4082 |
1246
+ | 0.1935 | 10950 | 0.4056 |
1247
+ | 0.1944 | 11000 | 0.4435 |
1248
 
1249
 
1250
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f408adec52fbb09d85249b52246fc558988060284c8e9e89545510e5f53f1fe
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68fa837444bb1d1983506586971bf36bcb05644b535c7a58278e0f70de2e98b7
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3a277a8a603891a188ecd4a7bff193d0e3832dc4f4e3f75b9504c7a244433a2
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e30cc5ca132903fd2a4ed91cad71091631f1cc2f4eba3197af287eeafbaf42
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b39af6a910c9139a42774120919755175edcea033ab599451e3e8ba8327efe56
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22fe178097ffc288163bb2b208108bb4bcac4332048a6b0583e8f00c3efbf3d
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:618fd513d5c5e6b63ad8e193c3fcdd0eeefcf9ab6df7ca5c5fbc622a63a33f7d
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a0047c1f4849b7b6cea19c700df8596a7bc71c9ef39f6ce4cc6960374828c2
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fea756d925863317d55b7c25598d1b5b7a22bdd3a88f5ac4983f02069f2fe984
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd72024597b2c3bf8343bfe8c1006caf70bcb94b9f5552f878b8f2c8272ff940
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.190843066918768,
6
  "eval_steps": 500,
7
- "global_step": 10800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1520,6 +1520,34 @@
1520
  "learning_rate": 4.495493903516523e-05,
1521
  "loss": 0.4372,
1522
  "step": 10800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1523
  }
1524
  ],
1525
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.19437719778763407,
6
  "eval_steps": 500,
7
+ "global_step": 11000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1520
  "learning_rate": 4.495493903516523e-05,
1521
  "loss": 0.4372,
1522
  "step": 10800
1523
+ },
1524
+ {
1525
+ "epoch": 0.1917265996359845,
1526
+ "grad_norm": 2.532017230987549,
1527
+ "learning_rate": 4.4906834737193457e-05,
1528
+ "loss": 0.3286,
1529
+ "step": 10850
1530
+ },
1531
+ {
1532
+ "epoch": 0.19261013235320104,
1533
+ "grad_norm": 3.721505641937256,
1534
+ "learning_rate": 4.485774871885493e-05,
1535
+ "loss": 0.4082,
1536
+ "step": 10900
1537
+ },
1538
+ {
1539
+ "epoch": 0.19349366507041757,
1540
+ "grad_norm": 2.2368271350860596,
1541
+ "learning_rate": 4.4808662700516386e-05,
1542
+ "loss": 0.4056,
1543
+ "step": 10950
1544
+ },
1545
+ {
1546
+ "epoch": 0.19437719778763407,
1547
+ "grad_norm": 2.2011897563934326,
1548
+ "learning_rate": 4.475957668217785e-05,
1549
+ "loss": 0.4435,
1550
+ "step": 11000
1551
  }
1552
  ],
1553
  "logging_steps": 50,