CortexPE commited on
Commit
81f0308
·
verified ·
1 Parent(s): 8f080a4

Upload folder using huggingface_hub

Browse files
checkpoint-1750/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3,
26
+ "LABEL_4": 4,
27
+ "LABEL_5": 5,
28
+ "LABEL_6": 6
29
+ },
30
+ "max_position_embeddings": 512,
31
+ "model_type": "distilbert",
32
+ "n_heads": 12,
33
+ "n_layers": 6,
34
+ "pad_token_id": 0,
35
+ "problem_type": "single_label_classification",
36
+ "qa_dropout": 0.1,
37
+ "seq_classif_dropout": 0.2,
38
+ "sinusoidal_pos_embds": false,
39
+ "tie_weights_": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.49.0",
42
+ "vocab_size": 30522
43
+ }
checkpoint-1750/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d894ae4b9d5cc26ee8be188e69a18d10101e4dac56c66b2e2c54b9ea354e7c
3
+ size 267847948
checkpoint-1750/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1e3fb5918c9d4336aa7864deef0a4686ba6690f773e7171d90490ec93876f9
3
+ size 535758010
checkpoint-1750/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978f7bab02af7657e390514650a4953fe69eab70770f065bdbb10941acb77c40
3
+ size 14244
checkpoint-1750/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddfaf0c262bfe460a222956722661d67bbbdf55c68aa1429f43b600fd33ec0d
3
+ size 1064
checkpoint-1750/trainer_state.json ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9856031417621324,
3
+ "best_model_checkpoint": "distilbert-type-classifier/checkpoint-1750",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1750,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11428571428571428,
13
+ "grad_norm": 1.4620862007141113,
14
+ "learning_rate": 1.923809523809524e-05,
15
+ "loss": 0.8914,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.22857142857142856,
20
+ "grad_norm": 0.9015448093414307,
21
+ "learning_rate": 1.8476190476190478e-05,
22
+ "loss": 0.186,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.34285714285714286,
27
+ "grad_norm": 0.17388969659805298,
28
+ "learning_rate": 1.7714285714285717e-05,
29
+ "loss": 0.0844,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.45714285714285713,
34
+ "grad_norm": 5.441539764404297,
35
+ "learning_rate": 1.6952380952380955e-05,
36
+ "loss": 0.0738,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.5714285714285714,
41
+ "grad_norm": 0.07328463345766068,
42
+ "learning_rate": 1.6190476190476193e-05,
43
+ "loss": 0.0524,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.6857142857142857,
48
+ "grad_norm": 0.07699141651391983,
49
+ "learning_rate": 1.542857142857143e-05,
50
+ "loss": 0.0567,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.8,
55
+ "grad_norm": 13.823246002197266,
56
+ "learning_rate": 1.4666666666666666e-05,
57
+ "loss": 0.0444,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.9142857142857143,
62
+ "grad_norm": 0.07251616567373276,
63
+ "learning_rate": 1.3904761904761905e-05,
64
+ "loss": 0.0375,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "eval_accuracy": 0.9849949983327776,
70
+ "eval_f1": 0.9845642879906822,
71
+ "eval_loss": 0.07195836305618286,
72
+ "eval_precision": 0.9850021393367597,
73
+ "eval_recall": 0.9849949983327776,
74
+ "eval_runtime": 11.3969,
75
+ "eval_samples_per_second": 263.142,
76
+ "eval_steps_per_second": 16.496,
77
+ "step": 875
78
+ },
79
+ {
80
+ "epoch": 1.0285714285714285,
81
+ "grad_norm": 0.15092064440250397,
82
+ "learning_rate": 1.3142857142857145e-05,
83
+ "loss": 0.0487,
84
+ "step": 900
85
+ },
86
+ {
87
+ "epoch": 1.1428571428571428,
88
+ "grad_norm": 0.019707441329956055,
89
+ "learning_rate": 1.2380952380952383e-05,
90
+ "loss": 0.0295,
91
+ "step": 1000
92
+ },
93
+ {
94
+ "epoch": 1.2571428571428571,
95
+ "grad_norm": 0.019482087343931198,
96
+ "learning_rate": 1.1619047619047621e-05,
97
+ "loss": 0.019,
98
+ "step": 1100
99
+ },
100
+ {
101
+ "epoch": 1.3714285714285714,
102
+ "grad_norm": 0.017313998192548752,
103
+ "learning_rate": 1.0857142857142858e-05,
104
+ "loss": 0.0303,
105
+ "step": 1200
106
+ },
107
+ {
108
+ "epoch": 1.4857142857142858,
109
+ "grad_norm": 0.02844456024467945,
110
+ "learning_rate": 1.0095238095238096e-05,
111
+ "loss": 0.0188,
112
+ "step": 1300
113
+ },
114
+ {
115
+ "epoch": 1.6,
116
+ "grad_norm": 0.3132454454898834,
117
+ "learning_rate": 9.333333333333334e-06,
118
+ "loss": 0.0148,
119
+ "step": 1400
120
+ },
121
+ {
122
+ "epoch": 1.7142857142857144,
123
+ "grad_norm": 0.033423177897930145,
124
+ "learning_rate": 8.571428571428571e-06,
125
+ "loss": 0.0216,
126
+ "step": 1500
127
+ },
128
+ {
129
+ "epoch": 1.8285714285714287,
130
+ "grad_norm": 0.012587839737534523,
131
+ "learning_rate": 7.809523809523811e-06,
132
+ "loss": 0.0099,
133
+ "step": 1600
134
+ },
135
+ {
136
+ "epoch": 1.9428571428571428,
137
+ "grad_norm": 0.010759529657661915,
138
+ "learning_rate": 7.047619047619048e-06,
139
+ "loss": 0.0089,
140
+ "step": 1700
141
+ },
142
+ {
143
+ "epoch": 2.0,
144
+ "eval_accuracy": 0.9859953317772591,
145
+ "eval_f1": 0.9856031417621324,
146
+ "eval_loss": 0.06867047399282455,
147
+ "eval_precision": 0.9859730777105192,
148
+ "eval_recall": 0.9859953317772591,
149
+ "eval_runtime": 11.4007,
150
+ "eval_samples_per_second": 263.055,
151
+ "eval_steps_per_second": 16.49,
152
+ "step": 1750
153
+ }
154
+ ],
155
+ "logging_steps": 100,
156
+ "max_steps": 2625,
157
+ "num_input_tokens_seen": 0,
158
+ "num_train_epochs": 3,
159
+ "save_steps": 500,
160
+ "stateful_callbacks": {
161
+ "EarlyStoppingCallback": {
162
+ "args": {
163
+ "early_stopping_patience": 2,
164
+ "early_stopping_threshold": 0.0
165
+ },
166
+ "attributes": {
167
+ "early_stopping_patience_counter": 0
168
+ }
169
+ },
170
+ "TrainerControl": {
171
+ "args": {
172
+ "should_epoch_stop": false,
173
+ "should_evaluate": false,
174
+ "should_log": false,
175
+ "should_save": true,
176
+ "should_training_stop": false
177
+ },
178
+ "attributes": {}
179
+ }
180
+ },
181
+ "total_flos": 927155754656256.0,
182
+ "train_batch_size": 16,
183
+ "trial_name": null,
184
+ "trial_params": null
185
+ }
checkpoint-1750/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
3
+ size 5304
checkpoint-2625/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3,
26
+ "LABEL_4": 4,
27
+ "LABEL_5": 5,
28
+ "LABEL_6": 6
29
+ },
30
+ "max_position_embeddings": 512,
31
+ "model_type": "distilbert",
32
+ "n_heads": 12,
33
+ "n_layers": 6,
34
+ "pad_token_id": 0,
35
+ "problem_type": "single_label_classification",
36
+ "qa_dropout": 0.1,
37
+ "seq_classif_dropout": 0.2,
38
+ "sinusoidal_pos_embds": false,
39
+ "tie_weights_": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.49.0",
42
+ "vocab_size": 30522
43
+ }
checkpoint-2625/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f122579f057d33dc7f447e9da1748ae33d185fe72c716a49c6d7a9d7edb170f
3
+ size 267847948
checkpoint-2625/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efeb65ae52e78add1ec6957140de6b43be8937777687cbab5141356e39975545
3
+ size 535758010
checkpoint-2625/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08bb7394ebf849123be4226fab5ffe87e4adaf61e755aa694735f27486d7d221
3
+ size 14244
checkpoint-2625/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84decddedc343afdfe65c5401b1ea8f21003cfa643ae145ae2bd45c9f7512652
3
+ size 1064
checkpoint-2625/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9902624352671658,
3
+ "best_model_checkpoint": "distilbert-type-classifier/checkpoint-2625",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2625,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11428571428571428,
13
+ "grad_norm": 1.4620862007141113,
14
+ "learning_rate": 1.923809523809524e-05,
15
+ "loss": 0.8914,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.22857142857142856,
20
+ "grad_norm": 0.9015448093414307,
21
+ "learning_rate": 1.8476190476190478e-05,
22
+ "loss": 0.186,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.34285714285714286,
27
+ "grad_norm": 0.17388969659805298,
28
+ "learning_rate": 1.7714285714285717e-05,
29
+ "loss": 0.0844,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.45714285714285713,
34
+ "grad_norm": 5.441539764404297,
35
+ "learning_rate": 1.6952380952380955e-05,
36
+ "loss": 0.0738,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.5714285714285714,
41
+ "grad_norm": 0.07328463345766068,
42
+ "learning_rate": 1.6190476190476193e-05,
43
+ "loss": 0.0524,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.6857142857142857,
48
+ "grad_norm": 0.07699141651391983,
49
+ "learning_rate": 1.542857142857143e-05,
50
+ "loss": 0.0567,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.8,
55
+ "grad_norm": 13.823246002197266,
56
+ "learning_rate": 1.4666666666666666e-05,
57
+ "loss": 0.0444,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.9142857142857143,
62
+ "grad_norm": 0.07251616567373276,
63
+ "learning_rate": 1.3904761904761905e-05,
64
+ "loss": 0.0375,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "eval_accuracy": 0.9849949983327776,
70
+ "eval_f1": 0.9845642879906822,
71
+ "eval_loss": 0.07195836305618286,
72
+ "eval_precision": 0.9850021393367597,
73
+ "eval_recall": 0.9849949983327776,
74
+ "eval_runtime": 11.3969,
75
+ "eval_samples_per_second": 263.142,
76
+ "eval_steps_per_second": 16.496,
77
+ "step": 875
78
+ },
79
+ {
80
+ "epoch": 1.0285714285714285,
81
+ "grad_norm": 0.15092064440250397,
82
+ "learning_rate": 1.3142857142857145e-05,
83
+ "loss": 0.0487,
84
+ "step": 900
85
+ },
86
+ {
87
+ "epoch": 1.1428571428571428,
88
+ "grad_norm": 0.019707441329956055,
89
+ "learning_rate": 1.2380952380952383e-05,
90
+ "loss": 0.0295,
91
+ "step": 1000
92
+ },
93
+ {
94
+ "epoch": 1.2571428571428571,
95
+ "grad_norm": 0.019482087343931198,
96
+ "learning_rate": 1.1619047619047621e-05,
97
+ "loss": 0.019,
98
+ "step": 1100
99
+ },
100
+ {
101
+ "epoch": 1.3714285714285714,
102
+ "grad_norm": 0.017313998192548752,
103
+ "learning_rate": 1.0857142857142858e-05,
104
+ "loss": 0.0303,
105
+ "step": 1200
106
+ },
107
+ {
108
+ "epoch": 1.4857142857142858,
109
+ "grad_norm": 0.02844456024467945,
110
+ "learning_rate": 1.0095238095238096e-05,
111
+ "loss": 0.0188,
112
+ "step": 1300
113
+ },
114
+ {
115
+ "epoch": 1.6,
116
+ "grad_norm": 0.3132454454898834,
117
+ "learning_rate": 9.333333333333334e-06,
118
+ "loss": 0.0148,
119
+ "step": 1400
120
+ },
121
+ {
122
+ "epoch": 1.7142857142857144,
123
+ "grad_norm": 0.033423177897930145,
124
+ "learning_rate": 8.571428571428571e-06,
125
+ "loss": 0.0216,
126
+ "step": 1500
127
+ },
128
+ {
129
+ "epoch": 1.8285714285714287,
130
+ "grad_norm": 0.012587839737534523,
131
+ "learning_rate": 7.809523809523811e-06,
132
+ "loss": 0.0099,
133
+ "step": 1600
134
+ },
135
+ {
136
+ "epoch": 1.9428571428571428,
137
+ "grad_norm": 0.010759529657661915,
138
+ "learning_rate": 7.047619047619048e-06,
139
+ "loss": 0.0089,
140
+ "step": 1700
141
+ },
142
+ {
143
+ "epoch": 2.0,
144
+ "eval_accuracy": 0.9859953317772591,
145
+ "eval_f1": 0.9856031417621324,
146
+ "eval_loss": 0.06867047399282455,
147
+ "eval_precision": 0.9859730777105192,
148
+ "eval_recall": 0.9859953317772591,
149
+ "eval_runtime": 11.4007,
150
+ "eval_samples_per_second": 263.055,
151
+ "eval_steps_per_second": 16.49,
152
+ "step": 1750
153
+ },
154
+ {
155
+ "epoch": 2.057142857142857,
156
+ "grad_norm": 0.021305246278643608,
157
+ "learning_rate": 6.285714285714286e-06,
158
+ "loss": 0.0126,
159
+ "step": 1800
160
+ },
161
+ {
162
+ "epoch": 2.1714285714285713,
163
+ "grad_norm": 0.012538880109786987,
164
+ "learning_rate": 5.523809523809525e-06,
165
+ "loss": 0.007,
166
+ "step": 1900
167
+ },
168
+ {
169
+ "epoch": 2.2857142857142856,
170
+ "grad_norm": 0.009147515520453453,
171
+ "learning_rate": 4.761904761904762e-06,
172
+ "loss": 0.0108,
173
+ "step": 2000
174
+ },
175
+ {
176
+ "epoch": 2.4,
177
+ "grad_norm": 0.053507789969444275,
178
+ "learning_rate": 4.000000000000001e-06,
179
+ "loss": 0.0009,
180
+ "step": 2100
181
+ },
182
+ {
183
+ "epoch": 2.5142857142857142,
184
+ "grad_norm": 0.02192852832376957,
185
+ "learning_rate": 3.2380952380952385e-06,
186
+ "loss": 0.0025,
187
+ "step": 2200
188
+ },
189
+ {
190
+ "epoch": 2.6285714285714286,
191
+ "grad_norm": 5.773326396942139,
192
+ "learning_rate": 2.4761904761904764e-06,
193
+ "loss": 0.0092,
194
+ "step": 2300
195
+ },
196
+ {
197
+ "epoch": 2.742857142857143,
198
+ "grad_norm": 0.007939064875245094,
199
+ "learning_rate": 1.7142857142857145e-06,
200
+ "loss": 0.0007,
201
+ "step": 2400
202
+ },
203
+ {
204
+ "epoch": 2.857142857142857,
205
+ "grad_norm": 0.008160616271197796,
206
+ "learning_rate": 9.523809523809525e-07,
207
+ "loss": 0.0044,
208
+ "step": 2500
209
+ },
210
+ {
211
+ "epoch": 2.9714285714285715,
212
+ "grad_norm": 0.009465965442359447,
213
+ "learning_rate": 1.904761904761905e-07,
214
+ "loss": 0.0085,
215
+ "step": 2600
216
+ },
217
+ {
218
+ "epoch": 3.0,
219
+ "eval_accuracy": 0.9903301100366789,
220
+ "eval_f1": 0.9902624352671658,
221
+ "eval_loss": 0.05811132863163948,
222
+ "eval_precision": 0.9902595291145613,
223
+ "eval_recall": 0.9903301100366789,
224
+ "eval_runtime": 11.257,
225
+ "eval_samples_per_second": 266.413,
226
+ "eval_steps_per_second": 16.701,
227
+ "step": 2625
228
+ }
229
+ ],
230
+ "logging_steps": 100,
231
+ "max_steps": 2625,
232
+ "num_input_tokens_seen": 0,
233
+ "num_train_epochs": 3,
234
+ "save_steps": 500,
235
+ "stateful_callbacks": {
236
+ "EarlyStoppingCallback": {
237
+ "args": {
238
+ "early_stopping_patience": 2,
239
+ "early_stopping_threshold": 0.0
240
+ },
241
+ "attributes": {
242
+ "early_stopping_patience_counter": 0
243
+ }
244
+ },
245
+ "TrainerControl": {
246
+ "args": {
247
+ "should_epoch_stop": false,
248
+ "should_evaluate": false,
249
+ "should_log": false,
250
+ "should_save": true,
251
+ "should_training_stop": true
252
+ },
253
+ "attributes": {}
254
+ }
255
+ },
256
+ "total_flos": 1390733631984384.0,
257
+ "train_batch_size": 16,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
checkpoint-2625/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
3
+ size 5304
checkpoint-875/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3,
26
+ "LABEL_4": 4,
27
+ "LABEL_5": 5,
28
+ "LABEL_6": 6
29
+ },
30
+ "max_position_embeddings": 512,
31
+ "model_type": "distilbert",
32
+ "n_heads": 12,
33
+ "n_layers": 6,
34
+ "pad_token_id": 0,
35
+ "problem_type": "single_label_classification",
36
+ "qa_dropout": 0.1,
37
+ "seq_classif_dropout": 0.2,
38
+ "sinusoidal_pos_embds": false,
39
+ "tie_weights_": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.49.0",
42
+ "vocab_size": 30522
43
+ }
checkpoint-875/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ba872f979cf3b152ea5b9aecf016cfd0d69ae35545b63ba7eb4c7b9ac2fc8c
3
+ size 267847948
checkpoint-875/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4a1538cc2e2669793b1fe565f860f8e377be35a7a70bb012c2a4d3c4ec1d09
3
+ size 535758010
checkpoint-875/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec995f4ba70e374efef67b4a374fa81d531514c9ed99243e50bd5409be012bef
3
+ size 14244
checkpoint-875/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0f293fb265866b04cf2e58f1795f83a3571bad9174a30158203f83d1c29522
3
+ size 1064
checkpoint-875/trainer_state.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9845642879906822,
3
+ "best_model_checkpoint": "distilbert-type-classifier/checkpoint-875",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 875,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11428571428571428,
13
+ "grad_norm": 1.4620862007141113,
14
+ "learning_rate": 1.923809523809524e-05,
15
+ "loss": 0.8914,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.22857142857142856,
20
+ "grad_norm": 0.9015448093414307,
21
+ "learning_rate": 1.8476190476190478e-05,
22
+ "loss": 0.186,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.34285714285714286,
27
+ "grad_norm": 0.17388969659805298,
28
+ "learning_rate": 1.7714285714285717e-05,
29
+ "loss": 0.0844,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.45714285714285713,
34
+ "grad_norm": 5.441539764404297,
35
+ "learning_rate": 1.6952380952380955e-05,
36
+ "loss": 0.0738,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.5714285714285714,
41
+ "grad_norm": 0.07328463345766068,
42
+ "learning_rate": 1.6190476190476193e-05,
43
+ "loss": 0.0524,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.6857142857142857,
48
+ "grad_norm": 0.07699141651391983,
49
+ "learning_rate": 1.542857142857143e-05,
50
+ "loss": 0.0567,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.8,
55
+ "grad_norm": 13.823246002197266,
56
+ "learning_rate": 1.4666666666666666e-05,
57
+ "loss": 0.0444,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.9142857142857143,
62
+ "grad_norm": 0.07251616567373276,
63
+ "learning_rate": 1.3904761904761905e-05,
64
+ "loss": 0.0375,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "eval_accuracy": 0.9849949983327776,
70
+ "eval_f1": 0.9845642879906822,
71
+ "eval_loss": 0.07195836305618286,
72
+ "eval_precision": 0.9850021393367597,
73
+ "eval_recall": 0.9849949983327776,
74
+ "eval_runtime": 11.3969,
75
+ "eval_samples_per_second": 263.142,
76
+ "eval_steps_per_second": 16.496,
77
+ "step": 875
78
+ }
79
+ ],
80
+ "logging_steps": 100,
81
+ "max_steps": 2625,
82
+ "num_input_tokens_seen": 0,
83
+ "num_train_epochs": 3,
84
+ "save_steps": 500,
85
+ "stateful_callbacks": {
86
+ "EarlyStoppingCallback": {
87
+ "args": {
88
+ "early_stopping_patience": 2,
89
+ "early_stopping_threshold": 0.0
90
+ },
91
+ "attributes": {
92
+ "early_stopping_patience_counter": 0
93
+ }
94
+ },
95
+ "TrainerControl": {
96
+ "args": {
97
+ "should_epoch_stop": false,
98
+ "should_evaluate": false,
99
+ "should_log": false,
100
+ "should_save": true,
101
+ "should_training_stop": false
102
+ },
103
+ "attributes": {}
104
+ }
105
+ },
106
+ "total_flos": 463577877328128.0,
107
+ "train_batch_size": 16,
108
+ "trial_name": null,
109
+ "trial_params": null
110
+ }
checkpoint-875/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
3
+ size 5304
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4",
17
+ "5": "LABEL_5",
18
+ "6": "LABEL_6"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3,
26
+ "LABEL_4": 4,
27
+ "LABEL_5": 5,
28
+ "LABEL_6": 6
29
+ },
30
+ "max_position_embeddings": 512,
31
+ "model_type": "distilbert",
32
+ "n_heads": 12,
33
+ "n_layers": 6,
34
+ "pad_token_id": 0,
35
+ "problem_type": "single_label_classification",
36
+ "qa_dropout": 0.1,
37
+ "seq_classif_dropout": 0.2,
38
+ "sinusoidal_pos_embds": false,
39
+ "tie_weights_": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.49.0",
42
+ "vocab_size": 30522
43
+ }
confusion_matrix.png ADDED
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f122579f057d33dc7f447e9da1748ae33d185fe72c716a49c6d7a9d7edb170f
3
+ size 267847948
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:609d9e093ff10cb72ff2ceab0b60b959ffb5a8e69641d5f5afc035f09f45f51b
3
+ size 5304
type_mapping.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": "coherent", "1": "grammatical_errors", "2": "random_bytes", "3": "random_tokens", "4": "random_words", "5": "run_on", "6": "word_soup"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff