guyhadad01 commited on
Commit
b9f4678
·
verified ·
1 Parent(s): df64f5a

Training in progress, step 10200, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1225,6 +1225,10 @@ You can finetune this model on your own dataset.
1225
  | 0.1749 | 9900 | 0.4193 |
1226
  | 0.1758 | 9950 | 0.3173 |
1227
  | 0.1767 | 10000 | 0.4569 |
 
 
 
 
1228
 
1229
 
1230
  ### Framework Versions
 
1225
  | 0.1749 | 9900 | 0.4193 |
1226
  | 0.1758 | 9950 | 0.3173 |
1227
  | 0.1767 | 10000 | 0.4569 |
1228
+ | 0.1776 | 10050 | 0.4538 |
1229
+ | 0.1785 | 10100 | 0.4422 |
1230
+ | 0.1794 | 10150 | 0.3747 |
1231
+ | 0.1802 | 10200 | 0.3989 |
1232
 
1233
 
1234
  ### Framework Versions
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59544be7f17f690843a7b80096f64b1e3d29b5b39717cdc35f18b358c6aa330b
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1113f1dc63b4ef74f5c024aa4257a74f5c601162a5392123b472bd440c772d
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7786e202caa6e3b4a367e42ca66bc274cb81d33f0bd440e8ec441d4bc4a6bbfb
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:372cd21ba6640c7fb2d1ef5b71e6d5270fa8bff460e3c646226db5c68492b951
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6395e61165750a60c8ef153cec1804a8ff2db337ecba35b230075ccb363443da
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b6cb074bbe2129c4a92512f6b7604d9e93435cd6ffac4a406363aba2e66f67
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5f63680070bf507d18d4c264f788fe918fbe3f7f9774c4e8c816737dc3b4be4
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8373615cf09b792af1b9fd441a341b87607a411c0918e8ae083ffab9de6dcc10
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84fcbd3212b661f15243c53e45a5f75752a67ffa024a0416b41d6014d2a1bc3d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58c8231fde4f806f412947f64ac786f123198ace358add629069cb5de99c1e42
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.1767065434433037,
6
  "eval_steps": 500,
7
- "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1408,6 +1408,34 @@
1408
  "learning_rate": 4.574031532858181e-05,
1409
  "loss": 0.4569,
1410
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1411
  }
1412
  ],
1413
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.18024067431216978,
6
  "eval_steps": 500,
7
+ "global_step": 10200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1408
  "learning_rate": 4.574031532858181e-05,
1409
  "loss": 0.4569,
1410
  "step": 10000
1411
+ },
1412
+ {
1413
+ "epoch": 0.17759007616052022,
1414
+ "grad_norm": 1.6226812601089478,
1415
+ "learning_rate": 4.569122931024327e-05,
1416
+ "loss": 0.4538,
1417
+ "step": 10050
1418
+ },
1419
+ {
1420
+ "epoch": 0.17847360887773675,
1421
+ "grad_norm": 1.9845385551452637,
1422
+ "learning_rate": 4.564214329190474e-05,
1423
+ "loss": 0.4422,
1424
+ "step": 10100
1425
+ },
1426
+ {
1427
+ "epoch": 0.17935714159495325,
1428
+ "grad_norm": 1.7016047239303589,
1429
+ "learning_rate": 4.5593057273566195e-05,
1430
+ "loss": 0.3747,
1431
+ "step": 10150
1432
+ },
1433
+ {
1434
+ "epoch": 0.18024067431216978,
1435
+ "grad_norm": 2.2167670726776123,
1436
+ "learning_rate": 4.5543971255227666e-05,
1437
+ "loss": 0.3989,
1438
+ "step": 10200
1439
  }
1440
  ],
1441
  "logging_steps": 50,