guyhadad01 commited on
Commit
1f4450e
·
verified ·
1 Parent(s): 9e43eec

Training in progress, step 23600, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1491,6 +1491,14 @@ You can finetune this model on your own dataset.
1491
  | 0.4082 | 23100 | 0.3474 |
1492
  | 0.4091 | 23150 | 0.3208 |
1493
  | 0.4100 | 23200 | 0.3798 |
 
 
 
 
 
 
 
 
1494
 
1495
  </details>
1496
 
 
1491
  | 0.4082 | 23100 | 0.3474 |
1492
  | 0.4091 | 23150 | 0.3208 |
1493
  | 0.4100 | 23200 | 0.3798 |
1494
+ | 0.4108 | 23250 | 0.3282 |
1495
+ | 0.4117 | 23300 | 0.3302 |
1496
+ | 0.4126 | 23350 | 0.3599 |
1497
+ | 0.4135 | 23400 | 0.3608 |
1498
+ | 0.4144 | 23450 | 0.3387 |
1499
+ | 0.4153 | 23500 | 0.3987 |
1500
+ | 0.4161 | 23550 | 0.3387 |
1501
+ | 0.4170 | 23600 | 0.2989 |
1502
 
1503
  </details>
1504
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cfe587849d1306f54e0ca75ee4b8dc42ffa4c0050923c00408ab072955907d3
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1617eb2ae4888507c4f4075423705e736487e0fd06011313c271b8a67d2121e7
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c016c7f7476d35ba0914e4807cd567e2323f8abd2649d40533bb1edf8afea2d
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8082e636e89c0305931d4fed9e511d53d0c861249cb9eb1baa51ec94b573d123
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7301644e101f87025474e0abd0c4e21251cc4c43a5173ce57ba0318fade3400
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7c3077f3b0e21db426cf04aaf6706b3f8e724b43a1c804482891604f1539c3f
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8bb4812f9196d1a220df2036c293c5cb5d81dc224d96c15decb25fee077dd8a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a1548d715b309492a66002f720121ae6b58979a558a4ea26d5d559620bd59b
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55ce77f59b929ccf856f258ba2d8bdee259c33a00d44ab9b7d2ff7d9ff4f481c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c549b0e10abd21bebaa5ec4fd4b6a6e95036a423d8901ec4f127ce499a3bb98
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4099591807884646,
6
  "eval_steps": 500,
7
- "global_step": 23200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3256,6 +3256,62 @@
3256
  "learning_rate": 3.278847852977558e-05,
3257
  "loss": 0.3798,
3258
  "step": 23200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3259
  }
3260
  ],
3261
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4170274425261967,
6
  "eval_steps": 500,
7
+ "global_step": 23600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3256
  "learning_rate": 3.278847852977558e-05,
3257
  "loss": 0.3798,
3258
  "step": 23200
3259
+ },
3260
+ {
3261
+ "epoch": 0.41084271350568113,
3262
+ "grad_norm": 1.8930203914642334,
3263
+ "learning_rate": 3.273939251143704e-05,
3264
+ "loss": 0.3282,
3265
+ "step": 23250
3266
+ },
3267
+ {
3268
+ "epoch": 0.4117262462228976,
3269
+ "grad_norm": 1.256135106086731,
3270
+ "learning_rate": 3.2690306493098507e-05,
3271
+ "loss": 0.3302,
3272
+ "step": 23300
3273
+ },
3274
+ {
3275
+ "epoch": 0.41260977894011414,
3276
+ "grad_norm": 1.952988862991333,
3277
+ "learning_rate": 3.264122047475997e-05,
3278
+ "loss": 0.3599,
3279
+ "step": 23350
3280
+ },
3281
+ {
3282
+ "epoch": 0.41349331165733066,
3283
+ "grad_norm": 1.3686082363128662,
3284
+ "learning_rate": 3.2592134456421436e-05,
3285
+ "loss": 0.3608,
3286
+ "step": 23400
3287
+ },
3288
+ {
3289
+ "epoch": 0.4143768443745472,
3290
+ "grad_norm": 1.56107759475708,
3291
+ "learning_rate": 3.2543048438082894e-05,
3292
+ "loss": 0.3387,
3293
+ "step": 23450
3294
+ },
3295
+ {
3296
+ "epoch": 0.4152603770917637,
3297
+ "grad_norm": 1.823240876197815,
3298
+ "learning_rate": 3.249396241974436e-05,
3299
+ "loss": 0.3987,
3300
+ "step": 23500
3301
+ },
3302
+ {
3303
+ "epoch": 0.41614390980898025,
3304
+ "grad_norm": 1.2912514209747314,
3305
+ "learning_rate": 3.244487640140583e-05,
3306
+ "loss": 0.3387,
3307
+ "step": 23550
3308
+ },
3309
+ {
3310
+ "epoch": 0.4170274425261967,
3311
+ "grad_norm": 1.5520604848861694,
3312
+ "learning_rate": 3.239579038306729e-05,
3313
+ "loss": 0.2989,
3314
+ "step": 23600
3315
  }
3316
  ],
3317
  "logging_steps": 50,