uvegesistvan commited on
Commit
c4e6527
·
verified ·
1 Parent(s): 5beb113

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-650/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11"
25
+ },
26
+ "initializer_range": 0.02,
27
+ "intermediate_size": 4096,
28
+ "label2id": {
29
+ "LABEL_0": 0,
30
+ "LABEL_1": 1,
31
+ "LABEL_10": 10,
32
+ "LABEL_11": 11,
33
+ "LABEL_2": 2,
34
+ "LABEL_3": 3,
35
+ "LABEL_4": 4,
36
+ "LABEL_5": 5,
37
+ "LABEL_6": 6,
38
+ "LABEL_7": 7,
39
+ "LABEL_8": 8,
40
+ "LABEL_9": 9
41
+ },
42
+ "layer_norm_eps": 1e-05,
43
+ "max_position_embeddings": 514,
44
+ "model_type": "xlm-roberta",
45
+ "num_attention_heads": 16,
46
+ "num_hidden_layers": 24,
47
+ "output_past": true,
48
+ "pad_token_id": 1,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.50.3",
53
+ "type_vocab_size": 1,
54
+ "use_cache": true,
55
+ "vocab_size": 250002
56
+ }
checkpoint-650/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a61d3d7d4c6e39dc60f95ebcc8ac78cb8907334c82a62302305b29cc691b20d1
3
+ size 2239659672
checkpoint-650/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e43e921848b4160b479d0e3d1069ce07dd246d00a1f0f6b915c6dc641476471
3
+ size 4479554705
checkpoint-650/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7ff2d3adf149de93fb9af2596af86a4e073e792b944f7a34a8d9e43b7f2e09
3
+ size 14244
checkpoint-650/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc49ce2ed3a78b592179072211a7232b0ec4bf21d989c819aa813a0eee974496
3
+ size 1064
checkpoint-650/trainer_state.json ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 650,
3
+ "best_metric": 1.446366786956787,
4
+ "best_model_checkpoint": "model/checkpoint-650",
5
+ "epoch": 10.0,
6
+ "eval_steps": 100,
7
+ "global_step": 650,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "grad_norm": 18.839448928833008,
15
+ "learning_rate": 9e-06,
16
+ "loss": 2.2193,
17
+ "step": 65
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.296875,
22
+ "eval_f1": 0.13591867469879518,
23
+ "eval_loss": 1.9436771869659424,
24
+ "eval_precision": 0.088134765625,
25
+ "eval_recall": 0.296875,
26
+ "eval_runtime": 0.5308,
27
+ "eval_samples_per_second": 120.581,
28
+ "eval_steps_per_second": 15.073,
29
+ "step": 65
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "grad_norm": 13.428215026855469,
34
+ "learning_rate": 8.000000000000001e-06,
35
+ "loss": 2.0787,
36
+ "step": 130
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.34375,
41
+ "eval_f1": 0.2071691176470588,
42
+ "eval_loss": 1.872170329093933,
43
+ "eval_precision": 0.15072278911564627,
44
+ "eval_recall": 0.34375,
45
+ "eval_runtime": 0.5302,
46
+ "eval_samples_per_second": 120.719,
47
+ "eval_steps_per_second": 15.09,
48
+ "step": 130
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "grad_norm": 53.54623031616211,
53
+ "learning_rate": 7e-06,
54
+ "loss": 1.9946,
55
+ "step": 195
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_accuracy": 0.4375,
60
+ "eval_f1": 0.29532839962997226,
61
+ "eval_loss": 1.8184549808502197,
62
+ "eval_precision": 0.2342509920634921,
63
+ "eval_recall": 0.4375,
64
+ "eval_runtime": 0.5335,
65
+ "eval_samples_per_second": 119.973,
66
+ "eval_steps_per_second": 14.997,
67
+ "step": 195
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "grad_norm": 15.883207321166992,
72
+ "learning_rate": 6e-06,
73
+ "loss": 1.8783,
74
+ "step": 260
75
+ },
76
+ {
77
+ "epoch": 4.0,
78
+ "eval_accuracy": 0.421875,
79
+ "eval_f1": 0.3224431818181818,
80
+ "eval_loss": 1.8057754039764404,
81
+ "eval_precision": 0.3003472222222222,
82
+ "eval_recall": 0.421875,
83
+ "eval_runtime": 0.5292,
84
+ "eval_samples_per_second": 120.949,
85
+ "eval_steps_per_second": 15.119,
86
+ "step": 260
87
+ },
88
+ {
89
+ "epoch": 5.0,
90
+ "grad_norm": 52.486629486083984,
91
+ "learning_rate": 5e-06,
92
+ "loss": 1.7463,
93
+ "step": 325
94
+ },
95
+ {
96
+ "epoch": 5.0,
97
+ "eval_accuracy": 0.359375,
98
+ "eval_f1": 0.2792616648992577,
99
+ "eval_loss": 1.7504793405532837,
100
+ "eval_precision": 0.24629103535353536,
101
+ "eval_recall": 0.359375,
102
+ "eval_runtime": 0.5495,
103
+ "eval_samples_per_second": 116.477,
104
+ "eval_steps_per_second": 14.56,
105
+ "step": 325
106
+ },
107
+ {
108
+ "epoch": 6.0,
109
+ "grad_norm": 81.41285705566406,
110
+ "learning_rate": 4.000000000000001e-06,
111
+ "loss": 1.5803,
112
+ "step": 390
113
+ },
114
+ {
115
+ "epoch": 6.0,
116
+ "eval_accuracy": 0.46875,
117
+ "eval_f1": 0.41320569674228214,
118
+ "eval_loss": 1.6302127838134766,
119
+ "eval_precision": 0.3737571022727273,
120
+ "eval_recall": 0.46875,
121
+ "eval_runtime": 0.5313,
122
+ "eval_samples_per_second": 120.469,
123
+ "eval_steps_per_second": 15.059,
124
+ "step": 390
125
+ },
126
+ {
127
+ "epoch": 7.0,
128
+ "grad_norm": 45.92146682739258,
129
+ "learning_rate": 3e-06,
130
+ "loss": 1.3897,
131
+ "step": 455
132
+ },
133
+ {
134
+ "epoch": 7.0,
135
+ "eval_accuracy": 0.453125,
136
+ "eval_f1": 0.41390128968253964,
137
+ "eval_loss": 1.587652325630188,
138
+ "eval_precision": 0.3891989087301588,
139
+ "eval_recall": 0.453125,
140
+ "eval_runtime": 0.5534,
141
+ "eval_samples_per_second": 115.652,
142
+ "eval_steps_per_second": 14.456,
143
+ "step": 455
144
+ },
145
+ {
146
+ "epoch": 8.0,
147
+ "grad_norm": 164.84347534179688,
148
+ "learning_rate": 2.0000000000000003e-06,
149
+ "loss": 1.2265,
150
+ "step": 520
151
+ },
152
+ {
153
+ "epoch": 8.0,
154
+ "eval_accuracy": 0.46875,
155
+ "eval_f1": 0.44462377899877903,
156
+ "eval_loss": 1.482975959777832,
157
+ "eval_precision": 0.4474907904595405,
158
+ "eval_recall": 0.46875,
159
+ "eval_runtime": 0.5315,
160
+ "eval_samples_per_second": 120.403,
161
+ "eval_steps_per_second": 15.05,
162
+ "step": 520
163
+ },
164
+ {
165
+ "epoch": 9.0,
166
+ "grad_norm": 163.72511291503906,
167
+ "learning_rate": 1.0000000000000002e-06,
168
+ "loss": 1.0659,
169
+ "step": 585
170
+ },
171
+ {
172
+ "epoch": 9.0,
173
+ "eval_accuracy": 0.453125,
174
+ "eval_f1": 0.4214488636363637,
175
+ "eval_loss": 1.4710180759429932,
176
+ "eval_precision": 0.41629464285714285,
177
+ "eval_recall": 0.453125,
178
+ "eval_runtime": 0.5304,
179
+ "eval_samples_per_second": 120.672,
180
+ "eval_steps_per_second": 15.084,
181
+ "step": 585
182
+ },
183
+ {
184
+ "epoch": 10.0,
185
+ "grad_norm": 280.70709228515625,
186
+ "learning_rate": 0.0,
187
+ "loss": 0.9751,
188
+ "step": 650
189
+ },
190
+ {
191
+ "epoch": 10.0,
192
+ "eval_accuracy": 0.5,
193
+ "eval_f1": 0.47144955738705735,
194
+ "eval_loss": 1.446366786956787,
195
+ "eval_precision": 0.4710898042929293,
196
+ "eval_recall": 0.5,
197
+ "eval_runtime": 0.5301,
198
+ "eval_samples_per_second": 120.735,
199
+ "eval_steps_per_second": 15.092,
200
+ "step": 650
201
+ }
202
+ ],
203
+ "logging_steps": 100,
204
+ "max_steps": 650,
205
+ "num_input_tokens_seen": 0,
206
+ "num_train_epochs": 10,
207
+ "save_steps": 100,
208
+ "stateful_callbacks": {
209
+ "EarlyStoppingCallback": {
210
+ "args": {
211
+ "early_stopping_patience": 2,
212
+ "early_stopping_threshold": 0.0
213
+ },
214
+ "attributes": {
215
+ "early_stopping_patience_counter": 0
216
+ }
217
+ },
218
+ "TrainerControl": {
219
+ "args": {
220
+ "should_epoch_stop": false,
221
+ "should_evaluate": false,
222
+ "should_log": false,
223
+ "should_save": true,
224
+ "should_training_stop": true
225
+ },
226
+ "attributes": {}
227
+ }
228
+ },
229
+ "total_flos": 1199902171084800.0,
230
+ "train_batch_size": 8,
231
+ "trial_name": null,
232
+ "trial_params": null
233
+ }
checkpoint-650/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0597ad01e6d745620abded78b27c0d3132d1fd4cfaee8c266a31971c0273581a
3
+ size 5240
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10",
24
+ "11": "LABEL_11"
25
+ },
26
+ "initializer_range": 0.02,
27
+ "intermediate_size": 4096,
28
+ "label2id": {
29
+ "LABEL_0": 0,
30
+ "LABEL_1": 1,
31
+ "LABEL_10": 10,
32
+ "LABEL_11": 11,
33
+ "LABEL_2": 2,
34
+ "LABEL_3": 3,
35
+ "LABEL_4": 4,
36
+ "LABEL_5": 5,
37
+ "LABEL_6": 6,
38
+ "LABEL_7": 7,
39
+ "LABEL_8": 8,
40
+ "LABEL_9": 9
41
+ },
42
+ "layer_norm_eps": 1e-05,
43
+ "max_position_embeddings": 514,
44
+ "model_type": "xlm-roberta",
45
+ "num_attention_heads": 16,
46
+ "num_hidden_layers": 24,
47
+ "output_past": true,
48
+ "pad_token_id": 1,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.50.3",
53
+ "type_vocab_size": 1,
54
+ "use_cache": true,
55
+ "vocab_size": 250002
56
+ }
events.out.tfevents.1743875863.ebf3ff675ae4.574.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd3e5822df4a4c9faf60f0a71e28c44567b793104db065e222cc1a169b09520
3
+ size 9275
events.out.tfevents.1743876063.ebf3ff675ae4.574.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9d39a80fc3beb36a5f482aafd8bf5dccb007090ae478e67e0922850511b4eb2
3
+ size 12691
logs/events.out.tfevents.1740652331.686e763fa1a3.6372.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b679ab2ba83443dd64c4ce1ad8fc68431d53c7b4e8729e949e77a6e9887185a
3
+ size 5048
logs/events.out.tfevents.1740652422.686e763fa1a3.6372.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f61ba655f9b9482c492b3b9d0281386398db0edc9c5bdd8042bffbc72875f99
3
+ size 8912
logs/events.out.tfevents.1740653808.686e763fa1a3.6372.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db458f0b236db8c609f2ced5700e0d9f1d569bb73ce0e0146707cf3c1d3623d
3
+ size 5925
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a61d3d7d4c6e39dc60f95ebcc8ac78cb8907334c82a62302305b29cc691b20d1
3
+ size 2239659672
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffb37461c391f096759f4a9bbbc329da0f36952f88bab061fcf84940c022e98
3
+ size 17082999
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0597ad01e6d745620abded78b27c0d3132d1fd4cfaee8c266a31971c0273581a
3
+ size 5240