guyhadad01 commited on
Commit
c233136
·
verified ·
1 Parent(s): b0a33a9

Training in progress, step 13400, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1187,6 +1187,8 @@ You can finetune this model on your own dataset.
1187
  </details>
1188
 
1189
  ### Training Logs
 
 
1190
  | Epoch | Step | Training Loss |
1191
  |:------:|:-----:|:-------------:|
1192
  | 0.1458 | 8250 | 0.4688 |
@@ -1289,7 +1291,12 @@ You can finetune this model on your own dataset.
1289
  | 0.2315 | 13100 | 0.4359 |
1290
  | 0.2324 | 13150 | 0.3702 |
1291
  | 0.2333 | 13200 | 0.5026 |
 
 
 
 
1292
 
 
1293
 
1294
  ### Framework Versions
1295
  - Python: 3.11.13
 
1187
  </details>
1188
 
1189
  ### Training Logs
1190
+ <details><summary>Click to expand</summary>
1191
+
1192
  | Epoch | Step | Training Loss |
1193
  |:------:|:-----:|:-------------:|
1194
  | 0.1458 | 8250 | 0.4688 |
 
1291
  | 0.2315 | 13100 | 0.4359 |
1292
  | 0.2324 | 13150 | 0.3702 |
1293
  | 0.2333 | 13200 | 0.5026 |
1294
+ | 0.2341 | 13250 | 0.5201 |
1295
+ | 0.2350 | 13300 | 0.3857 |
1296
+ | 0.2359 | 13350 | 0.3555 |
1297
+ | 0.2368 | 13400 | 0.381 |
1298
 
1299
+ </details>
1300
 
1301
  ### Framework Versions
1302
  - Python: 3.11.13
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0408532633e3d53818583ff9ef02c9be81d3ffdd069f1e604b4cdcf7a79bbd08
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ff219d2dc182bff9938951ac2d922c87f8b3382fe905eda8bb33e8c98eb346
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b0884b763c5d2ec8f0b94fd605e20d3981be5785da135173f386b90a4f72ddc
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe0f24feef012ade435d0955f499b908ca4147d4a231048c8182b66d61b6f43
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bff3c556549471e3b0bd6d9a3f96647cfb8a7aa9ac95f0e83191eb61af4e6bc0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c735c57cb6abf0ceb0d281a7f9e69dc9a61b723c825d122108ee089e4f92d40
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0e926fae13271b40ac8f9ad92492b339eb4958bd5b878164fc6bd3383e8e0f9
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578b0f8cd0a36e27a4c0005bc9769962acef123517226f60d74acf957afcc72c
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c897b83c9872ac7b716f5b64194b036c139fb3350016b3588f5f49c22a743418
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54be013f521e475446bbdac4929f2f53d5e8b1200009f671ab53144a213230f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.23325263734516088,
6
  "eval_steps": 500,
7
- "global_step": 13200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1856,6 +1856,34 @@
1856
  "learning_rate": 4.260077359564902e-05,
1857
  "loss": 0.5026,
1858
  "step": 13200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1859
  }
1860
  ],
1861
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.23678676821402697,
6
  "eval_steps": 500,
7
+ "global_step": 13400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1856
  "learning_rate": 4.260077359564902e-05,
1857
  "loss": 0.5026,
1858
  "step": 13200
1859
+ },
1860
+ {
1861
+ "epoch": 0.2341361700623774,
1862
+ "grad_norm": 1.818076252937317,
1863
+ "learning_rate": 4.2551687577310476e-05,
1864
+ "loss": 0.5201,
1865
+ "step": 13250
1866
+ },
1867
+ {
1868
+ "epoch": 0.23501970277959394,
1869
+ "grad_norm": 1.9688682556152344,
1870
+ "learning_rate": 4.250260155897194e-05,
1871
+ "loss": 0.3857,
1872
+ "step": 13300
1873
+ },
1874
+ {
1875
+ "epoch": 0.23590323549681044,
1876
+ "grad_norm": 2.4908297061920166,
1877
+ "learning_rate": 4.245351554063341e-05,
1878
+ "loss": 0.3555,
1879
+ "step": 13350
1880
+ },
1881
+ {
1882
+ "epoch": 0.23678676821402697,
1883
+ "grad_norm": 1.9015276432037354,
1884
+ "learning_rate": 4.240442952229487e-05,
1885
+ "loss": 0.381,
1886
+ "step": 13400
1887
  }
1888
  ],
1889
  "logging_steps": 50,