EmreAkgul commited on
Commit
99ae96f
·
verified ·
1 Parent(s): 80170b1

Training in progress, epoch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-0/checkpoint-312/model.safetensors +1 -1
  3. run-0/checkpoint-312/optimizer.pt +1 -1
  4. run-0/checkpoint-312/rng_state.pth +1 -1
  5. run-0/checkpoint-312/scheduler.pt +1 -1
  6. run-0/checkpoint-312/trainer_state.json +15 -25
  7. run-0/checkpoint-312/training_args.bin +1 -1
  8. run-0/checkpoint-624/model.safetensors +1 -1
  9. run-0/checkpoint-624/optimizer.pt +1 -1
  10. run-0/checkpoint-624/rng_state.pth +1 -1
  11. run-0/checkpoint-624/scheduler.pt +1 -1
  12. run-0/checkpoint-624/trainer_state.json +29 -48
  13. run-0/checkpoint-624/training_args.bin +1 -1
  14. run-0/checkpoint-936/model.safetensors +1 -1
  15. run-0/checkpoint-936/optimizer.pt +1 -1
  16. run-0/checkpoint-936/scheduler.pt +1 -1
  17. run-0/checkpoint-936/trainer_state.json +30 -31
  18. run-0/checkpoint-936/training_args.bin +1 -1
  19. run-1/checkpoint-156/model.safetensors +1 -1
  20. run-1/checkpoint-156/optimizer.pt +1 -1
  21. run-1/checkpoint-156/rng_state.pth +1 -1
  22. run-1/checkpoint-156/scheduler.pt +1 -1
  23. run-1/checkpoint-156/trainer_state.json +24 -16
  24. run-1/checkpoint-156/training_args.bin +1 -1
  25. run-1/checkpoint-312/trainer_state.json +39 -22
  26. run-1/checkpoint-468/trainer_state.json +55 -29
  27. run-1/checkpoint-78/model.safetensors +1 -1
  28. run-1/checkpoint-78/optimizer.pt +1 -1
  29. run-1/checkpoint-78/rng_state.pth +1 -1
  30. run-1/checkpoint-78/scheduler.pt +1 -1
  31. run-1/checkpoint-78/trainer_state.json +18 -25
  32. run-1/checkpoint-78/training_args.bin +1 -1
  33. run-14/checkpoint-78/config.json +24 -0
  34. run-14/checkpoint-78/model.safetensors +3 -0
  35. run-14/checkpoint-78/optimizer.pt +3 -0
  36. run-14/checkpoint-78/rng_state.pth +3 -0
  37. run-14/checkpoint-78/scheduler.pt +3 -0
  38. run-14/checkpoint-78/special_tokens_map.json +7 -0
  39. run-14/checkpoint-78/tokenizer.json +0 -0
  40. run-14/checkpoint-78/tokenizer_config.json +56 -0
  41. run-14/checkpoint-78/trainer_state.json +59 -0
  42. run-14/checkpoint-78/training_args.bin +3 -0
  43. run-14/checkpoint-78/vocab.txt +0 -0
  44. run-15/checkpoint-117/config.json +24 -0
  45. run-15/checkpoint-117/model.safetensors +3 -0
  46. run-15/checkpoint-39/config.json +24 -0
  47. run-15/checkpoint-39/model.safetensors +3 -0
  48. run-15/checkpoint-39/optimizer.pt +3 -0
  49. run-15/checkpoint-39/rng_state.pth +3 -0
  50. run-15/checkpoint-39/scheduler.pt +3 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:241ef8da471fb80623dcdb3856d66bf30562de8db05eb5a4d49671d305f4f620
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c3867cd6186c0f0fdda1b2286dbb5072fbb55d0c7c1781ce71197da52eaa00
3
  size 267832560
run-0/checkpoint-312/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a031297c1dc8843b31163f23de3749bda5f9d01fe5a1b6258d993fda8d0c2d99
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af33864575a010cdd1bf7b28c93d761c4018380479fc38d0a247071d4b8e870f
3
  size 267832560
run-0/checkpoint-312/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f016ca179d63f128dc92d87c1e84b990732ff5c1cc5a57718d06db18a51c002
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05713f040f19f66a8b5974c3e8efad7c98a94e42cad9ebc3ae3ff273e3d9a545
3
  size 535727290
run-0/checkpoint-312/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c636584a35981a860659e9c392382609636f3e7f8bb3e4c194cd479b3b5ce959
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464294b8f8d20e1c4ddec59fa526f01ab0118297dfdf6d7f16bfc2ad4e7c10ef
3
  size 14244
run-0/checkpoint-312/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:582d7d3c47473a240a197819a84edd7bd0c7b6a0e536041de49db2ca04f17cd5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:092bc1a854310f6eb6f681c4f33e75605c297e62a5634fbc23fcff624e36a1ff
3
  size 1064
run-0/checkpoint-312/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_global_step": 312,
3
- "best_metric": 0.6064981949458483,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-312",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 312,
8
  "is_hyper_param_search": true,
@@ -11,27 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.48014440433212996,
15
- "eval_loss": 0.697701096534729,
16
- "eval_runtime": 1.8239,
17
- "eval_samples_per_second": 151.874,
18
- "eval_steps_per_second": 2.741,
19
- "step": 156
20
- },
21
- {
22
- "epoch": 2.0,
23
- "eval_accuracy": 0.6064981949458483,
24
- "eval_loss": 0.6758008599281311,
25
- "eval_runtime": 1.7015,
26
- "eval_samples_per_second": 162.797,
27
- "eval_steps_per_second": 2.939,
28
  "step": 312
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 624,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 4,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -46,14 +37,13 @@
46
  }
47
  },
48
  "total_flos": 0,
49
- "train_batch_size": 16,
50
  "trial_name": null,
51
  "trial_params": {
52
- "dropout_rate": 0.3011303001917175,
53
- "learning_rate": 2.946052203220992e-05,
54
- "num_train_epochs": 4,
55
- "per_device_train_batch_size": 16,
56
- "warmup_steps": 31,
57
- "weight_decay": 0.08563849483087554
58
  }
59
  }
 
1
  {
2
  "best_global_step": 312,
3
+ "best_metric": 0.4657039711191336,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-312",
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
  "global_step": 312,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.4657039711191336,
15
+ "eval_loss": 0.6989437341690063,
16
+ "eval_runtime": 1.7076,
17
+ "eval_samples_per_second": 162.213,
18
+ "eval_steps_per_second": 2.928,
 
 
 
 
 
 
 
 
 
19
  "step": 312
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 936,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 3,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
 
37
  }
38
  },
39
  "total_flos": 0,
40
+ "train_batch_size": 8,
41
  "trial_name": null,
42
  "trial_params": {
43
+ "dropout_rate": 0.4164977664597427,
44
+ "learning_rate": 4.970849395732763e-05,
45
+ "max_length": 128,
46
+ "num_train_epochs": 3,
47
+ "per_device_train_batch_size": 8
 
48
  }
49
  }
run-0/checkpoint-312/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93434644324cfac8db4668812aba61eebff369e3a69662bad12ec95e4ac4e62b
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae215d94645660c249574c56f55fb76e2b43588ee2ee4a9b6a5129bf4169f594
3
  size 5432
run-0/checkpoint-624/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16d22ba48b8134e790f95240578748be30681cddc71d13eb059685daa7d35b65
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0cbfdbe4df5c0442239884b7fccaeb69336b685ac4bd48383868a2e2df8a18
3
  size 267832560
run-0/checkpoint-624/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:226ab04ce95224545fe84cc1ec0f3eb709d183548038581ea73e4936f85730c4
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b7727af781df4329eaa833381479243bdc243bf60c8847aecd36c17ed8b888
3
  size 535727290
run-0/checkpoint-624/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ca439a6f762b0d07da29657e36660ea25e1e816d9088ca8e9cfe99d642a720
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ded0337946703ddd48342fbb3bd34798462c4463e30ff7ada3890c4901afa1
3
  size 14244
run-0/checkpoint-624/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:509a94f1a30314dc509063c0ea6790d33341068fa0d1b44b4a26ba4e64606abc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:732129b8ff652ae97bb18868f4953153172787dad0894d12dd8bdf98f1c55547
3
  size 1064
run-0/checkpoint-624/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_global_step": 468,
3
- "best_metric": 0.6245487364620939,
4
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-468",
5
- "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 624,
8
  "is_hyper_param_search": true,
@@ -11,52 +11,34 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.48014440433212996,
15
- "eval_loss": 0.697701096534729,
16
- "eval_runtime": 1.8239,
17
- "eval_samples_per_second": 151.874,
18
- "eval_steps_per_second": 2.741,
19
- "step": 156
20
- },
21
- {
22
- "epoch": 2.0,
23
- "eval_accuracy": 0.6064981949458483,
24
- "eval_loss": 0.6758008599281311,
25
- "eval_runtime": 1.7015,
26
- "eval_samples_per_second": 162.797,
27
- "eval_steps_per_second": 2.939,
28
  "step": 312
29
  },
30
  {
31
- "epoch": 3.0,
32
- "eval_accuracy": 0.6245487364620939,
33
- "eval_loss": 0.7540601491928101,
34
- "eval_runtime": 1.8182,
35
- "eval_samples_per_second": 152.351,
36
- "eval_steps_per_second": 2.75,
37
- "step": 468
38
- },
39
- {
40
- "epoch": 3.2051282051282053,
41
- "grad_norm": 11.614490509033203,
42
- "learning_rate": 6.1603789746948225e-06,
43
- "loss": 0.5768,
44
  "step": 500
45
  },
46
  {
47
- "epoch": 4.0,
48
- "eval_accuracy": 0.6173285198555957,
49
- "eval_loss": 0.8625991940498352,
50
- "eval_runtime": 1.7067,
51
- "eval_samples_per_second": 162.302,
52
- "eval_steps_per_second": 2.93,
53
  "step": 624
54
  }
55
  ],
56
  "logging_steps": 500,
57
- "max_steps": 624,
58
  "num_input_tokens_seen": 0,
59
- "num_train_epochs": 4,
60
  "save_steps": 500,
61
  "stateful_callbacks": {
62
  "TrainerControl": {
@@ -65,20 +47,19 @@
65
  "should_evaluate": false,
66
  "should_log": false,
67
  "should_save": true,
68
- "should_training_stop": true
69
  },
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 376629134565480.0,
74
- "train_batch_size": 16,
75
  "trial_name": null,
76
  "trial_params": {
77
- "dropout_rate": 0.3011303001917175,
78
- "learning_rate": 2.946052203220992e-05,
79
- "num_train_epochs": 4,
80
- "per_device_train_batch_size": 16,
81
- "warmup_steps": 31,
82
- "weight_decay": 0.08563849483087554
83
  }
84
  }
 
1
  {
2
+ "best_global_step": 624,
3
+ "best_metric": 0.5415162454873647,
4
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-624",
5
+ "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 624,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.4657039711191336,
15
+ "eval_loss": 0.6989437341690063,
16
+ "eval_runtime": 1.7076,
17
+ "eval_samples_per_second": 162.213,
18
+ "eval_steps_per_second": 2.928,
 
 
 
 
 
 
 
 
 
19
  "step": 312
20
  },
21
  {
22
+ "epoch": 1.6025641025641026,
23
+ "grad_norm": 2.613506555557251,
24
+ "learning_rate": 2.5924525556692398e-05,
25
+ "loss": 0.6844,
 
 
 
 
 
 
 
 
 
26
  "step": 500
27
  },
28
  {
29
+ "epoch": 2.0,
30
+ "eval_accuracy": 0.5415162454873647,
31
+ "eval_loss": 0.7258058190345764,
32
+ "eval_runtime": 1.8248,
33
+ "eval_samples_per_second": 151.793,
34
+ "eval_steps_per_second": 2.74,
35
  "step": 624
36
  }
37
  ],
38
  "logging_steps": 500,
39
+ "max_steps": 936,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 3,
42
  "save_steps": 500,
43
  "stateful_callbacks": {
44
  "TrainerControl": {
 
47
  "should_evaluate": false,
48
  "should_log": false,
49
  "should_save": true,
50
+ "should_training_stop": false
51
  },
52
  "attributes": {}
53
  }
54
  },
55
+ "total_flos": 2457345275431320.0,
56
+ "train_batch_size": 8,
57
  "trial_name": null,
58
  "trial_params": {
59
+ "dropout_rate": 0.4164977664597427,
60
+ "learning_rate": 4.970849395732763e-05,
61
+ "max_length": 128,
62
+ "num_train_epochs": 3,
63
+ "per_device_train_batch_size": 8
 
64
  }
65
  }
run-0/checkpoint-624/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93434644324cfac8db4668812aba61eebff369e3a69662bad12ec95e4ac4e62b
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae215d94645660c249574c56f55fb76e2b43588ee2ee4a9b6a5129bf4169f594
3
  size 5432
run-0/checkpoint-936/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:526a605fe2da7b2ad734d4ee7394237a876bd3e73c82dbfa97fae876d00cf792
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f8f3e2273546bac6160e79ccd41b9434a5cc5cb61c5e9904f7bf6fb95c082d2
3
  size 267832560
run-0/checkpoint-936/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24c313ba9e4820ca87e9021fd5e8f09d57c03055ee426062d7707c75e24c6ae0
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63cb4b91ac772fbba08ff7ae8ee7bc0ec70d5f70060e504203d37b33fb9d8de2
3
  size 535727290
run-0/checkpoint-936/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cc2e8dd33a72a46e342f84a895ce3b3b031318c5f55f97f294bb4de1e7121cc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a447a8c0282d073f8870604335911a6d882470042208ce485c5335cda624ed58
3
  size 1064
run-0/checkpoint-936/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 624,
3
- "best_metric": 0.6462093862815884,
4
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-624",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 936,
@@ -11,43 +11,43 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5090252707581228,
15
- "eval_loss": 0.6910260915756226,
16
- "eval_runtime": 1.5124,
17
- "eval_samples_per_second": 183.157,
18
- "eval_steps_per_second": 3.306,
19
  "step": 312
20
  },
21
  {
22
  "epoch": 1.6025641025641026,
23
- "grad_norm": 3.160397529602051,
24
- "learning_rate": 1.4005781344298294e-05,
25
- "loss": 0.6824,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.6462093862815884,
31
- "eval_loss": 0.6472051739692688,
32
- "eval_runtime": 1.485,
33
- "eval_samples_per_second": 186.536,
34
- "eval_steps_per_second": 3.367,
35
  "step": 624
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_accuracy": 0.6389891696750902,
40
- "eval_loss": 0.7419655919075012,
41
- "eval_runtime": 1.4972,
42
- "eval_samples_per_second": 185.016,
43
- "eval_steps_per_second": 3.34,
44
  "step": 936
45
  }
46
  ],
47
  "logging_steps": 500,
48
- "max_steps": 1248,
49
  "num_input_tokens_seen": 0,
50
- "num_train_epochs": 4,
51
  "save_steps": 500,
52
  "stateful_callbacks": {
53
  "TrainerControl": {
@@ -56,20 +56,19 @@
56
  "should_evaluate": false,
57
  "should_log": false,
58
  "should_save": true,
59
- "should_training_stop": false
60
  },
61
  "attributes": {}
62
  }
63
  },
64
- "total_flos": 540033880216968.0,
65
  "train_batch_size": 8,
66
  "trial_name": null,
67
  "trial_params": {
68
- "adam_epsilon": 5.546915994232822e-07,
69
- "learning_rate": 2.164529844118827e-05,
70
- "num_train_epochs": 4,
71
- "per_device_train_batch_size": 8,
72
- "warmup_steps": 92,
73
- "weight_decay": 0.08085514196621982
74
  }
75
  }
 
1
  {
2
+ "best_global_step": 936,
3
+ "best_metric": 0.5595667870036101,
4
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-936",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 936,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.4657039711191336,
15
+ "eval_loss": 0.6989437341690063,
16
+ "eval_runtime": 1.7076,
17
+ "eval_samples_per_second": 162.213,
18
+ "eval_steps_per_second": 2.928,
19
  "step": 312
20
  },
21
  {
22
  "epoch": 1.6025641025641026,
23
+ "grad_norm": 2.613506555557251,
24
+ "learning_rate": 2.5924525556692398e-05,
25
+ "loss": 0.6844,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.5415162454873647,
31
+ "eval_loss": 0.7258058190345764,
32
+ "eval_runtime": 1.8248,
33
+ "eval_samples_per_second": 151.793,
34
+ "eval_steps_per_second": 2.74,
35
  "step": 624
36
  },
37
  {
38
  "epoch": 3.0,
39
+ "eval_accuracy": 0.5595667870036101,
40
+ "eval_loss": 1.0762841701507568,
41
+ "eval_runtime": 1.7848,
42
+ "eval_samples_per_second": 155.197,
43
+ "eval_steps_per_second": 2.801,
44
  "step": 936
45
  }
46
  ],
47
  "logging_steps": 500,
48
+ "max_steps": 936,
49
  "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 3,
51
  "save_steps": 500,
52
  "stateful_callbacks": {
53
  "TrainerControl": {
 
56
  "should_evaluate": false,
57
  "should_log": false,
58
  "should_save": true,
59
+ "should_training_stop": true
60
  },
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 2457345275431320.0,
65
  "train_batch_size": 8,
66
  "trial_name": null,
67
  "trial_params": {
68
+ "dropout_rate": 0.4164977664597427,
69
+ "learning_rate": 4.970849395732763e-05,
70
+ "max_length": 128,
71
+ "num_train_epochs": 3,
72
+ "per_device_train_batch_size": 8
 
73
  }
74
  }
run-0/checkpoint-936/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:526f4e75b1078dde13fd1d7bfa0e0694a6c559e22cc26dfd75dd18df37f301c9
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae215d94645660c249574c56f55fb76e2b43588ee2ee4a9b6a5129bf4169f594
3
  size 5432
run-1/checkpoint-156/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec9d489f4f05da8b1d480f8079c39b7047e50e7277384b61aa2cf3fe156e329
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68702cd791f78aa3d10c4b935448583f849b856f89f33cef64354903ed420c83
3
  size 267832560
run-1/checkpoint-156/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9804e9390a2e23fc3aa462a1a625ab372e053d2d09a8eda8ebdc5e76821a82f
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9866a8fbd0a3705928bbc94824a7717215313b36162edc78e0fb9f3dd1c5ec36
3
  size 535727290
run-1/checkpoint-156/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e40e8c9d9b9b8131cef13eddb6f2ff0afb61d421f8b11d654da2d6c72c34ac
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfb9a0ae2285bf813c7659c509c0aad02f3b9ebf80caa88b2e061183e7605ba5
3
  size 14244
run-1/checkpoint-156/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc811ac5797046cb43ddf681f6eb73d8669b5e92648272ac91a97b2551d60959
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ccbd3dce6fd4a5ce81ed931b07c38cd4d9cab7da262d5994f34f6c5acfae67
3
  size 1064
run-1/checkpoint-156/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_global_step": 156,
3
- "best_metric": 0.49458483754512633,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-156",
5
- "epoch": 1.0,
6
  "eval_steps": 500,
7
  "global_step": 156,
8
  "is_hyper_param_search": true,
@@ -11,18 +11,27 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.49458483754512633,
15
- "eval_loss": 0.6931370496749878,
16
- "eval_runtime": 1.6099,
17
- "eval_samples_per_second": 172.065,
18
- "eval_steps_per_second": 3.106,
 
 
 
 
 
 
 
 
 
19
  "step": 156
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 780,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 5,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
@@ -37,14 +46,13 @@
37
  }
38
  },
39
  "total_flos": 0,
40
- "train_batch_size": 16,
41
  "trial_name": null,
42
  "trial_params": {
43
- "dropout_rate": 0.34335779036160474,
44
- "learning_rate": 3.2907757924840336e-05,
45
- "num_train_epochs": 5,
46
- "per_device_train_batch_size": 16,
47
- "warmup_steps": 7,
48
- "weight_decay": 0.011718685414344022
49
  }
50
  }
 
1
  {
2
  "best_global_step": 156,
3
+ "best_metric": 0.5415162454873647,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-156",
5
+ "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 156,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.5126353790613718,
15
+ "eval_loss": 0.6928116083145142,
16
+ "eval_runtime": 1.6157,
17
+ "eval_samples_per_second": 171.444,
18
+ "eval_steps_per_second": 3.095,
19
+ "step": 78
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_accuracy": 0.5415162454873647,
24
+ "eval_loss": 0.6922104954719543,
25
+ "eval_runtime": 1.6727,
26
+ "eval_samples_per_second": 165.597,
27
+ "eval_steps_per_second": 2.989,
28
  "step": 156
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 468,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 6,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
 
46
  }
47
  },
48
  "total_flos": 0,
49
+ "train_batch_size": 32,
50
  "trial_name": null,
51
  "trial_params": {
52
+ "dropout_rate": 0.20824535211573575,
53
+ "learning_rate": 2.1654651991046377e-05,
54
+ "max_length": 128,
55
+ "num_train_epochs": 6,
56
+ "per_device_train_batch_size": 32
 
57
  }
58
  }
run-1/checkpoint-156/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49857bf60516390ac64bdbd74b4e19cf501005742d1303351a514c72c9f73bcc
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d3f3ef87c93cc8393d6c6caeddd95144003bfa8d2fe1483588901c4b973a6e
3
  size 5432
run-1/checkpoint-312/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_global_step": 312,
3
- "best_metric": 0.6137184115523465,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-312",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 312,
8
  "is_hyper_param_search": true,
@@ -11,27 +11,45 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.49458483754512633,
15
- "eval_loss": 0.6931370496749878,
16
- "eval_runtime": 1.6099,
17
- "eval_samples_per_second": 172.065,
18
- "eval_steps_per_second": 3.106,
19
- "step": 156
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.6137184115523465,
24
- "eval_loss": 0.6719065308570862,
25
- "eval_runtime": 1.7819,
26
- "eval_samples_per_second": 155.451,
27
- "eval_steps_per_second": 2.806,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "step": 312
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 780,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 5,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -46,14 +64,13 @@
46
  }
47
  },
48
  "total_flos": 0,
49
- "train_batch_size": 16,
50
  "trial_name": null,
51
  "trial_params": {
52
- "dropout_rate": 0.34335779036160474,
53
- "learning_rate": 3.2907757924840336e-05,
54
- "num_train_epochs": 5,
55
- "per_device_train_batch_size": 16,
56
- "warmup_steps": 7,
57
- "weight_decay": 0.011718685414344022
58
  }
59
  }
 
1
  {
2
  "best_global_step": 312,
3
+ "best_metric": 0.6101083032490975,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-312",
5
+ "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 312,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.5126353790613718,
15
+ "eval_loss": 0.6928116083145142,
16
+ "eval_runtime": 1.6157,
17
+ "eval_samples_per_second": 171.444,
18
+ "eval_steps_per_second": 3.095,
19
+ "step": 78
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.5415162454873647,
24
+ "eval_loss": 0.6922104954719543,
25
+ "eval_runtime": 1.6727,
26
+ "eval_samples_per_second": 165.597,
27
+ "eval_steps_per_second": 2.989,
28
+ "step": 156
29
+ },
30
+ {
31
+ "epoch": 3.0,
32
+ "eval_accuracy": 0.6028880866425993,
33
+ "eval_loss": 0.675690770149231,
34
+ "eval_runtime": 1.7034,
35
+ "eval_samples_per_second": 162.612,
36
+ "eval_steps_per_second": 2.935,
37
+ "step": 234
38
+ },
39
+ {
40
+ "epoch": 4.0,
41
+ "eval_accuracy": 0.6101083032490975,
42
+ "eval_loss": 0.7228259444236755,
43
+ "eval_runtime": 1.7254,
44
+ "eval_samples_per_second": 160.54,
45
+ "eval_steps_per_second": 2.898,
46
  "step": 312
47
  }
48
  ],
49
  "logging_steps": 500,
50
+ "max_steps": 468,
51
  "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 6,
53
  "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
 
64
  }
65
  },
66
  "total_flos": 0,
67
+ "train_batch_size": 32,
68
  "trial_name": null,
69
  "trial_params": {
70
+ "dropout_rate": 0.20824535211573575,
71
+ "learning_rate": 2.1654651991046377e-05,
72
+ "max_length": 128,
73
+ "num_train_epochs": 6,
74
+ "per_device_train_batch_size": 32
 
75
  }
76
  }
run-1/checkpoint-468/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_global_step": 312,
3
- "best_metric": 0.6137184115523465,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-312",
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 468,
8
  "is_hyper_param_search": true,
@@ -11,36 +11,63 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.49458483754512633,
15
- "eval_loss": 0.6931370496749878,
16
- "eval_runtime": 1.6099,
17
- "eval_samples_per_second": 172.065,
18
- "eval_steps_per_second": 3.106,
19
- "step": 156
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.6137184115523465,
24
- "eval_loss": 0.6719065308570862,
25
- "eval_runtime": 1.7819,
26
- "eval_samples_per_second": 155.451,
27
- "eval_steps_per_second": 2.806,
28
- "step": 312
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.6137184115523465,
33
- "eval_loss": 0.7334751486778259,
34
- "eval_runtime": 1.7429,
35
- "eval_samples_per_second": 158.932,
36
- "eval_steps_per_second": 2.869,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "step": 468
38
  }
39
  ],
40
  "logging_steps": 500,
41
- "max_steps": 780,
42
  "num_input_tokens_seen": 0,
43
- "num_train_epochs": 5,
44
  "save_steps": 500,
45
  "stateful_callbacks": {
46
  "TrainerControl": {
@@ -49,20 +76,19 @@
49
  "should_evaluate": false,
50
  "should_log": false,
51
  "should_save": true,
52
- "should_training_stop": false
53
  },
54
  "attributes": {}
55
  }
56
  },
57
  "total_flos": 0,
58
- "train_batch_size": 16,
59
  "trial_name": null,
60
  "trial_params": {
61
- "dropout_rate": 0.34335779036160474,
62
- "learning_rate": 3.2907757924840336e-05,
63
- "num_train_epochs": 5,
64
- "per_device_train_batch_size": 16,
65
- "warmup_steps": 7,
66
- "weight_decay": 0.011718685414344022
67
  }
68
  }
 
1
  {
2
  "best_global_step": 312,
3
+ "best_metric": 0.6101083032490975,
4
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-312",
5
+ "epoch": 6.0,
6
  "eval_steps": 500,
7
  "global_step": 468,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.5126353790613718,
15
+ "eval_loss": 0.6928116083145142,
16
+ "eval_runtime": 1.6157,
17
+ "eval_samples_per_second": 171.444,
18
+ "eval_steps_per_second": 3.095,
19
+ "step": 78
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.5415162454873647,
24
+ "eval_loss": 0.6922104954719543,
25
+ "eval_runtime": 1.6727,
26
+ "eval_samples_per_second": 165.597,
27
+ "eval_steps_per_second": 2.989,
28
+ "step": 156
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.6028880866425993,
33
+ "eval_loss": 0.675690770149231,
34
+ "eval_runtime": 1.7034,
35
+ "eval_samples_per_second": 162.612,
36
+ "eval_steps_per_second": 2.935,
37
+ "step": 234
38
+ },
39
+ {
40
+ "epoch": 4.0,
41
+ "eval_accuracy": 0.6101083032490975,
42
+ "eval_loss": 0.7228259444236755,
43
+ "eval_runtime": 1.7254,
44
+ "eval_samples_per_second": 160.54,
45
+ "eval_steps_per_second": 2.898,
46
+ "step": 312
47
+ },
48
+ {
49
+ "epoch": 5.0,
50
+ "eval_accuracy": 0.6028880866425993,
51
+ "eval_loss": 0.8196464776992798,
52
+ "eval_runtime": 1.753,
53
+ "eval_samples_per_second": 158.017,
54
+ "eval_steps_per_second": 2.852,
55
+ "step": 390
56
+ },
57
+ {
58
+ "epoch": 6.0,
59
+ "eval_accuracy": 0.6064981949458483,
60
+ "eval_loss": 0.8347622156143188,
61
+ "eval_runtime": 1.7803,
62
+ "eval_samples_per_second": 155.595,
63
+ "eval_steps_per_second": 2.809,
64
  "step": 468
65
  }
66
  ],
67
  "logging_steps": 500,
68
+ "max_steps": 468,
69
  "num_input_tokens_seen": 0,
70
+ "num_train_epochs": 6,
71
  "save_steps": 500,
72
  "stateful_callbacks": {
73
  "TrainerControl": {
 
76
  "should_evaluate": false,
77
  "should_log": false,
78
  "should_save": true,
79
+ "should_training_stop": true
80
  },
81
  "attributes": {}
82
  }
83
  },
84
  "total_flos": 0,
85
+ "train_batch_size": 32,
86
  "trial_name": null,
87
  "trial_params": {
88
+ "dropout_rate": 0.20824535211573575,
89
+ "learning_rate": 2.1654651991046377e-05,
90
+ "max_length": 128,
91
+ "num_train_epochs": 6,
92
+ "per_device_train_batch_size": 32
 
93
  }
94
  }
run-1/checkpoint-78/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1dd982c1b553fdbc9b8a952e55d40e0d9cc04e480bf34f82e10d6a4ee181d7c
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c3867cd6186c0f0fdda1b2286dbb5072fbb55d0c7c1781ce71197da52eaa00
3
  size 267832560
run-1/checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16db6ef328621a03925d474c105d56d3a7a135c9e1fc5f110eae061fd10cdbf1
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb6279516369d1f42661dc60594da50d063d4e57ff8a5753461560cf4ccc592
3
  size 535727290
run-1/checkpoint-78/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:867b92593652105b5f08fe70d622012e55758c900eaf343351c736e7b1b4f3b7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e987394b080855104b2fc9e8a2a35b19f86d7761afc384f7d53b7c8b88c979
3
  size 14244
run-1/checkpoint-78/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb86949775bdf4d5e2bdaa6717a36cdf0553b78032ab7e03d20cb7dce25cdd05
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add46fb887d204aa07c58146414bb54dad8684c70c657f66e4e33b99005a5e26
3
  size 1064
run-1/checkpoint-78/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_global_step": 39,
3
- "best_metric": 0.5487364620938628,
4
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 78,
8
  "is_hyper_param_search": true,
@@ -11,27 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5487364620938628,
15
- "eval_loss": 0.6909578442573547,
16
- "eval_runtime": 1.5004,
17
- "eval_samples_per_second": 184.621,
18
- "eval_steps_per_second": 3.333,
19
- "step": 39
20
- },
21
- {
22
- "epoch": 2.0,
23
- "eval_accuracy": 0.5415162454873647,
24
- "eval_loss": 0.6907435655593872,
25
- "eval_runtime": 1.4749,
26
- "eval_samples_per_second": 187.809,
27
- "eval_steps_per_second": 3.39,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 78,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 2,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -40,17 +31,19 @@
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
- "should_training_stop": true
44
  },
45
  "attributes": {}
46
  }
47
  },
48
  "total_flos": 0,
49
- "train_batch_size": 64,
50
  "trial_name": null,
51
  "trial_params": {
52
- "learning_rate": 2.8881628279456785e-06,
53
- "num_train_epochs": 2,
54
- "per_device_train_batch_size": 64
 
 
55
  }
56
  }
 
1
  {
2
+ "best_global_step": 78,
3
+ "best_metric": 0.5126353790613718,
4
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-78",
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
  "global_step": 78,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.5126353790613718,
15
+ "eval_loss": 0.6928116083145142,
16
+ "eval_runtime": 1.6157,
17
+ "eval_samples_per_second": 171.444,
18
+ "eval_steps_per_second": 3.095,
 
 
 
 
 
 
 
 
 
19
  "step": 78
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 468,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 6,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
 
31
  "should_evaluate": false,
32
  "should_log": false,
33
  "should_save": true,
34
+ "should_training_stop": false
35
  },
36
  "attributes": {}
37
  }
38
  },
39
  "total_flos": 0,
40
+ "train_batch_size": 32,
41
  "trial_name": null,
42
  "trial_params": {
43
+ "dropout_rate": 0.20824535211573575,
44
+ "learning_rate": 2.1654651991046377e-05,
45
+ "max_length": 128,
46
+ "num_train_epochs": 6,
47
+ "per_device_train_batch_size": 32
48
  }
49
  }
run-1/checkpoint-78/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9941fba4fa1cff2a9aead33409263ffd88455374346eacff95cdb699559ad3a2
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d3f3ef87c93cc8393d6c6caeddd95144003bfa8d2fe1483588901c4b973a6e
3
  size 5432
run-14/checkpoint-78/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "initializer_range": 0.02,
11
+ "max_position_embeddings": 512,
12
+ "model_type": "distilbert",
13
+ "n_heads": 12,
14
+ "n_layers": 6,
15
+ "pad_token_id": 0,
16
+ "problem_type": "single_label_classification",
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "vocab_size": 30522
24
+ }
run-14/checkpoint-78/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9255c1aa08f2793046152f416df11bd80559ef9ea1e0557df60f36bedc6e91f9
3
+ size 267832560
run-14/checkpoint-78/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b61d22b7b46522e10f1520816173ba7a938c922fb0f9bcf3820919d5cf91e57
3
+ size 535727290
run-14/checkpoint-78/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867b92593652105b5f08fe70d622012e55758c900eaf343351c736e7b1b4f3b7
3
+ size 14244
run-14/checkpoint-78/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:100a001fd73a2ba0549b8cca6f0f390a0f4ae4d0f5358e55dec6aef4ad278851
3
+ size 1064
run-14/checkpoint-78/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-14/checkpoint-78/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-14/checkpoint-78/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-14/checkpoint-78/trainer_state.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 78,
3
+ "best_metric": 0.5703971119133574,
4
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-rte-run_3/run-14/checkpoint-78",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 78,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5667870036101083,
15
+ "eval_loss": 0.6876252889633179,
16
+ "eval_runtime": 1.7048,
17
+ "eval_samples_per_second": 162.48,
18
+ "eval_steps_per_second": 2.933,
19
+ "step": 39
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_accuracy": 0.5703971119133574,
24
+ "eval_loss": 0.6807517409324646,
25
+ "eval_runtime": 1.7318,
26
+ "eval_samples_per_second": 159.948,
27
+ "eval_steps_per_second": 2.887,
28
+ "step": 78
29
+ }
30
+ ],
31
+ "logging_steps": 500,
32
+ "max_steps": 234,
33
+ "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 6,
35
+ "save_steps": 500,
36
+ "stateful_callbacks": {
37
+ "TrainerControl": {
38
+ "args": {
39
+ "should_epoch_stop": false,
40
+ "should_evaluate": false,
41
+ "should_log": false,
42
+ "should_save": true,
43
+ "should_training_stop": false
44
+ },
45
+ "attributes": {}
46
+ }
47
+ },
48
+ "total_flos": 0,
49
+ "train_batch_size": 64,
50
+ "trial_name": null,
51
+ "trial_params": {
52
+ "dropout_rate": 0.3994557877518572,
53
+ "learning_rate": 2.1686743677849786e-05,
54
+ "num_train_epochs": 6,
55
+ "per_device_train_batch_size": 64,
56
+ "warmup_steps": 43,
57
+ "weight_decay": 0.03886015775146907
58
+ }
59
+ }
run-14/checkpoint-78/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93cf3e2688a47066a6179d8c03b9819776b74f37d7be7c152adb78bb2ef5fc54
3
+ size 5432
run-14/checkpoint-78/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-15/checkpoint-117/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "initializer_range": 0.02,
11
+ "max_position_embeddings": 512,
12
+ "model_type": "distilbert",
13
+ "n_heads": 12,
14
+ "n_layers": 6,
15
+ "pad_token_id": 0,
16
+ "problem_type": "single_label_classification",
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "vocab_size": 30522
24
+ }
run-15/checkpoint-117/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e645faf68fcad32039a193b1ed2a0cba07856ff74ed3b370c63140833bfe7b77
3
+ size 267832560
run-15/checkpoint-39/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "initializer_range": 0.02,
11
+ "max_position_embeddings": 512,
12
+ "model_type": "distilbert",
13
+ "n_heads": 12,
14
+ "n_layers": 6,
15
+ "pad_token_id": 0,
16
+ "problem_type": "single_label_classification",
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "vocab_size": 30522
24
+ }
run-15/checkpoint-39/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8c12dc3f66123eab2367a79cd511d6b837cdeb7e30137ed49309c6b5b97b42
3
+ size 267832560
run-15/checkpoint-39/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b82f4235e2b9f409dcc0a3fbfd06e28192a39d5ebc232065d985ce39e175dc5f
3
+ size 535727290
run-15/checkpoint-39/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec26492c01f8533e964548678a9125c8479f466e501c830c64a60759451765d
3
+ size 14244
run-15/checkpoint-39/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb107eb384085ed0d49f9d397f7e0b79d61dbe3389ce8f1927c4dd2e29b35555
3
+ size 1064