SystemAdmin123 commited on
Commit
f7f1aeb
·
verified ·
1 Parent(s): 6e61e18

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7ea76a7fe9a3a72bfd9d91b402405835199ee3276e58a75b2b90de4df6faf3
3
  size 2066752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1ae0cef03350b13f2142034ec12a4f632f2a167671a45fb33d96be882bf8881
3
  size 2066752
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55accac051f9670b17e34e185dbc7868d482bf70ed6d173806a6b70a0e363569
3
  size 2162798
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e7bc77588b6383613b88b55c7bb31c785abbac487d86ed9e1ea0688d925796
3
  size 2162798
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:470231bc1b9887c06a948023513b621d90c9fafd020c42706547857cecbea47a
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86bc4ba66dd9b2678296e8603b5fc45b9513c8a929966cabd3603650c1d00ca8
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f6c57457d54b7bbc2f9492e8a647b031ace734fd264dd50bf21159a960787c9
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ddf240f6d195d0e03799bc56610ed87dcf151c18d11e02b13b1a13f27c3bac
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c43ae2edd3b792abc4b0bbe5b96b0cbff30dfd7f8da70e732cd4ef7bf6a1ef4a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628e045b78e5d559e9fcef6658ed03e911773af6f7354bf6b3b679b0ff34bfdb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 36.92307692307692,
5
  "eval_steps": 40,
6
- "global_step": 480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,20 @@
447
  "eval_samples_per_second": 284.308,
448
  "eval_steps_per_second": 4.546,
449
  "step": 480
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 10,
@@ -466,7 +480,7 @@
466
  "attributes": {}
467
  }
468
  },
469
- "total_flos": 196396103761920.0,
470
  "train_batch_size": 32,
471
  "trial_name": null,
472
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 38.46153846153846,
5
  "eval_steps": 40,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 284.308,
448
  "eval_steps_per_second": 4.546,
449
  "step": 480
450
+ },
451
+ {
452
+ "epoch": 37.69230769230769,
453
+ "grad_norm": 0.4140625,
454
+ "learning_rate": 1.7822218477475494e-05,
455
+ "loss": 9.1778,
456
+ "step": 490
457
+ },
458
+ {
459
+ "epoch": 38.46153846153846,
460
+ "grad_norm": 0.419921875,
461
+ "learning_rate": 1.4808059116167305e-05,
462
+ "loss": 9.1781,
463
+ "step": 500
464
  }
465
  ],
466
  "logging_steps": 10,
 
480
  "attributes": {}
481
  }
482
  },
483
+ "total_flos": 204579274752000.0,
484
  "train_batch_size": 32,
485
  "trial_name": null,
486
  "trial_params": null