guyhadad01 commited on
Commit
c17bb58
·
verified ·
1 Parent(s): 63a97ad

Training in progress, step 18400, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1391,6 +1391,10 @@ You can finetune this model on your own dataset.
1391
  | 0.3198 | 18100 | 0.3279 |
1392
  | 0.3207 | 18150 | 0.3062 |
1393
  | 0.3216 | 18200 | 0.2973 |
 
 
 
 
1394
 
1395
  </details>
1396
 
 
1391
  | 0.3198 | 18100 | 0.3279 |
1392
  | 0.3207 | 18150 | 0.3062 |
1393
  | 0.3216 | 18200 | 0.2973 |
1394
+ | 0.3225 | 18250 | 0.4078 |
1395
+ | 0.3234 | 18300 | 0.31 |
1396
+ | 0.3243 | 18350 | 0.306 |
1397
+ | 0.3251 | 18400 | 0.3426 |
1398
 
1399
  </details>
1400
 
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c561d22f3f5062bc767250ffdb3fa4a0f7bd3dbdb65e4c11cfceaa01995c64c
3
  size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f49bf378de383189a093b0a2bf919799fdcc55993cf62d4aee6a7a981b2f6f
3
  size 90864192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979b402a40bd2a435e70fee699ca07b55766750d192cbf2268122098c8ea3e92
3
  size 180609210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e4ba3df85c6e099d0bc980346fc8364af443b293c36414cd30caab13dab2b5
3
  size 180609210
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28d8ccebbb7f7b52e625ef2554e1cd3690dd81aea7ece9b35eeb250cf32f7566
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb07bb76d154c255f7a6a65b33edbee34264ab63583d144665a60ece83d7919
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1747e186a8b719e713dac067421a4083615d1c151147e0b1a41977c8731e3e98
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:356d49be14a74ca1fba66e6f1f5cf686b54ed1e4a1626358e76328c151c43051
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:896e9fd82393923d4baed79b5528ea09727379e27a1318af2376e0aaf0f43d15
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d8f9a4e8d57a5144e44194422a5d35ad70c28c3e656d9069d30d3e3c869476
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.32160590906681275,
6
  "eval_steps": 500,
7
- "global_step": 18200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2556,6 +2556,34 @@
2556
  "learning_rate": 3.7694135202528916e-05,
2557
  "loss": 0.2973,
2558
  "step": 18200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2559
  }
2560
  ],
2561
  "logging_steps": 50,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.3251400399356788,
6
  "eval_steps": 500,
7
+ "global_step": 18400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2556
  "learning_rate": 3.7694135202528916e-05,
2557
  "loss": 0.2973,
2558
  "step": 18200
2559
+ },
2560
+ {
2561
+ "epoch": 0.3224894417840293,
2562
+ "grad_norm": 2.819748640060425,
2563
+ "learning_rate": 3.764504918419038e-05,
2564
+ "loss": 0.4078,
2565
+ "step": 18250
2566
+ },
2567
+ {
2568
+ "epoch": 0.3233729745012458,
2569
+ "grad_norm": 1.5743447542190552,
2570
+ "learning_rate": 3.759596316585184e-05,
2571
+ "loss": 0.31,
2572
+ "step": 18300
2573
+ },
2574
+ {
2575
+ "epoch": 0.3242565072184623,
2576
+ "grad_norm": 1.8966853618621826,
2577
+ "learning_rate": 3.7546877147513303e-05,
2578
+ "loss": 0.306,
2579
+ "step": 18350
2580
+ },
2581
+ {
2582
+ "epoch": 0.3251400399356788,
2583
+ "grad_norm": 2.7652056217193604,
2584
+ "learning_rate": 3.749779112917477e-05,
2585
+ "loss": 0.3426,
2586
+ "step": 18400
2587
  }
2588
  ],
2589
  "logging_steps": 50,