mgh6 commited on
Commit
860fcec
·
verified ·
1 Parent(s): f73fff1

Training in progress, step 4600, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc8084e5d2f40c117fb67d150045799cf2c3cae6229373318b8bd37a0a605209
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4fdcfcdbc428ac62e3be4844df2a7454e840e8969a5d0c963a4d867d92764b4
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4575ef57c4ebf8ade03a543a393b32ae16e5578f529ef3653494f07df3f0f2d
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368b55a584bc772d0b6375dbcd91c7f46be779bdc0ef7190ada74b670b7c8a1b
3
  size 268176506
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f21d552e0b8edeac4b814e68abd63ad5a2fa0f0f0b7c2bf9dd3d572dde4cd39
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a11bb7b12b9642bd5d82055be71dcb8d8482abac72121424fe481d88f5de75b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b4f9a397f8fb58627464d50119035e3cc721b6c9f0cc18f4836a495d9182e5f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1818895e958d1cb907d3d67eeeaacce674781a97f1e95a9febb4e6ac8d60460f
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:736c0a3c5e9cad8a63099122bc81fcfe6c1a1da1f52783fd5ab9a0a79f0525e7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a4fe1ceeb1cb8cee5e1e7b373586181b6fea32cd158c5e5eaeeed87798c2ea
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:182d0fd1f95f38608a11f6b6579d63e24538540d9ce9b05d123e02020b4a25cf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ace3b97e4bece8ab00e978bc4f2ddd7d0408f2682a1d707793492ed9cc9aa3
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6a126727af8bb09352afcee26a5ce13ba782b7b555b31ebe9b3deaff7994ff9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5dc6da207765f4ba830c89b2a2c26098140ff38d2eed9174783bad0060f41de
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.78049236536026,
3
- "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-4200",
4
- "epoch": 6.024096385542169,
5
  "eval_steps": 100,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -682,6 +682,21 @@
682
  "eval_samples_per_second": 894.234,
683
  "eval_steps_per_second": 3.613,
684
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
685
  }
686
  ],
687
  "logging_steps": 100,
@@ -696,7 +711,7 @@
696
  "early_stopping_threshold": 0.0
697
  },
698
  "attributes": {
699
- "early_stopping_patience_counter": 3
700
  }
701
  },
702
  "TrainerControl": {
@@ -710,7 +725,7 @@
710
  "attributes": {}
711
  }
712
  },
713
- "total_flos": 1.6340521844736e+17,
714
  "train_batch_size": 64,
715
  "trial_name": null,
716
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7793735861778259,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-4600",
4
+ "epoch": 6.157965194109773,
5
  "eval_steps": 100,
6
+ "global_step": 4600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
682
  "eval_samples_per_second": 894.234,
683
  "eval_steps_per_second": 3.613,
684
  "step": 4500
685
+ },
686
+ {
687
+ "epoch": 6.157965194109773,
688
+ "grad_norm": 0.18886443972587585,
689
+ "learning_rate": 0.0003842034805890228,
690
+ "loss": 0.7246,
691
+ "step": 4600
692
+ },
693
+ {
694
+ "epoch": 6.157965194109773,
695
+ "eval_loss": 0.7793735861778259,
696
+ "eval_runtime": 6.3661,
697
+ "eval_samples_per_second": 894.112,
698
+ "eval_steps_per_second": 3.613,
699
+ "step": 4600
700
  }
701
  ],
702
  "logging_steps": 100,
 
711
  "early_stopping_threshold": 0.0
712
  },
713
  "attributes": {
714
+ "early_stopping_patience_counter": 0
715
  }
716
  },
717
  "TrainerControl": {
 
725
  "attributes": {}
726
  }
727
  },
728
+ "total_flos": 1.67036445523968e+17,
729
  "train_batch_size": 64,
730
  "trial_name": null,
731
  "trial_params": null