hamidrezaahmadian commited on
Commit
dda156b
·
1 Parent(s): bd26629

copy paste

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: single_label_unbiased_relevant_profession
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # single_label_unbiased_relevant_profession
14
+
15
+ This model is a fine-tuned version of [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) on the None dataset.
16
+ It achieves the following results on the evaluation set:
17
+ - Loss: 1.5581
18
+ - Acc At K: 0.8934
19
+ - Acc: 0.5742
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 2e-05
39
+ - train_batch_size: 64
40
+ - eval_batch_size: 64
41
+ - seed: 42
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: linear
44
+ - num_epochs: 8
45
+
46
+ ### Training results
47
+
48
+ | Training Loss | Epoch | Step | Validation Loss | Acc At K | Acc |
49
+ |:-------------:|:-----:|:------:|:---------------:|:--------:|:------:|
50
+ | 4.1013 | 0.5 | 22700 | 2.6410 | 0.7356 | 0.4504 |
51
+ | 2.3359 | 1.0 | 45400 | 2.1112 | 0.8115 | 0.4979 |
52
+ | 1.9045 | 1.5 | 68100 | 1.9027 | 0.8428 | 0.5240 |
53
+ | 1.7084 | 2.0 | 90800 | 1.7826 | 0.8607 | 0.5340 |
54
+ | 1.5155 | 2.5 | 113500 | 1.7117 | 0.8711 | 0.5444 |
55
+ | 1.4211 | 3.0 | 136200 | 1.6643 | 0.8782 | 0.5493 |
56
+ | 1.2865 | 3.5 | 158900 | 1.6342 | 0.8812 | 0.5568 |
57
+ | 1.2357 | 4.0 | 181600 | 1.6077 | 0.8852 | 0.5588 |
58
+ | 1.1303 | 4.5 | 204300 | 1.6023 | 0.8873 | 0.5632 |
59
+ | 1.0987 | 5.0 | 227000 | 1.5784 | 0.8896 | 0.5652 |
60
+ | 1.0186 | 5.5 | 249700 | 1.5782 | 0.8904 | 0.5673 |
61
+ | 0.9982 | 6.0 | 272400 | 1.5712 | 0.8914 | 0.5707 |
62
+ | 0.9404 | 6.5 | 295100 | 1.5685 | 0.8920 | 0.5710 |
63
+ | 0.9263 | 7.0 | 317800 | 1.5615 | 0.8925 | 0.5725 |
64
+ | 0.8839 | 7.5 | 340500 | 1.5603 | 0.8929 | 0.5741 |
65
+ | 0.878 | 8.0 | 363200 | 1.5581 | 0.8934 | 0.5742 |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.26.1
71
+ - Pytorch 2.0.0+cu117
72
+ - Datasets 2.11.0
73
+ - Tokenizers 0.13.3
config.json ADDED
The diff for this file is too large to render. See raw diff
 
label_to_ids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90daaab57f699b8a87b5a5c0d483b1f0bdf07b3b4ae53c8be5efaac8df1b0f12
3
+ size 215823
last-checkpoint/config.json ADDED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d480c9464f4daba28debbda04a3eb18547e0ed0ab908b4c2cbba19b5a82b712c
3
+ size 2265359813
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d447ef6303c43f9940675e81d43f3e20e35f97097b908270af7b4adbe1ddd163
3
+ size 1132694197
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c90bd02baf6214ae01f1b407108ebdacb7280385a5871c8a223bb4689636d7cb
3
+ size 14575
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc266f42cee6a5338936cd9429837a516919132ca70cf301a4ba6a3766b45a84
3
+ size 627
last-checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
last-checkpoint/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f620ed598f02c6e9ad8bd7c093e0bf1f883f0a113b315e3299ae7d1b85581a
3
+ size 17082912
last-checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 512,
14
+ "name_or_path": "xlm-roberta-base",
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "special_tokens_map_file": null,
18
+ "tokenizer_class": "XLMRobertaTokenizer",
19
+ "unk_token": "<unk>"
20
+ }
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 7.929340763419308,
5
+ "global_step": 360000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.5,
12
+ "learning_rate": 1.8750027532433207e-05,
13
+ "loss": 4.1013,
14
+ "step": 22700
15
+ },
16
+ {
17
+ "epoch": 0.5,
18
+ "eval_acc": 0.45036102325248506,
19
+ "eval_acc_at_k": 0.7356285768554228,
20
+ "eval_loss": 2.6409595012664795,
21
+ "eval_runtime": 780.8248,
22
+ "eval_samples_per_second": 465.243,
23
+ "eval_steps_per_second": 7.271,
24
+ "step": 22700
25
+ },
26
+ {
27
+ "epoch": 1.0,
28
+ "learning_rate": 1.7500055064866416e-05,
29
+ "loss": 2.3359,
30
+ "step": 45400
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_acc": 0.4979065331032034,
35
+ "eval_acc_at_k": 0.8115219132718369,
36
+ "eval_loss": 2.111237049102783,
37
+ "eval_runtime": 763.2781,
38
+ "eval_samples_per_second": 475.938,
39
+ "eval_steps_per_second": 7.438,
40
+ "step": 45400
41
+ },
42
+ {
43
+ "epoch": 1.5,
44
+ "learning_rate": 1.625008259729962e-05,
45
+ "loss": 1.9045,
46
+ "step": 68100
47
+ },
48
+ {
49
+ "epoch": 1.5,
50
+ "eval_acc": 0.5239503073446141,
51
+ "eval_acc_at_k": 0.8428344523264872,
52
+ "eval_loss": 1.902678370475769,
53
+ "eval_runtime": 753.8194,
54
+ "eval_samples_per_second": 481.91,
55
+ "eval_steps_per_second": 7.531,
56
+ "step": 68100
57
+ },
58
+ {
59
+ "epoch": 2.0,
60
+ "learning_rate": 1.5000110129732826e-05,
61
+ "loss": 1.7084,
62
+ "step": 90800
63
+ },
64
+ {
65
+ "epoch": 2.0,
66
+ "eval_acc": 0.5340171166037663,
67
+ "eval_acc_at_k": 0.8607108152821707,
68
+ "eval_loss": 1.7825987339019775,
69
+ "eval_runtime": 756.3653,
70
+ "eval_samples_per_second": 480.288,
71
+ "eval_steps_per_second": 7.506,
72
+ "step": 90800
73
+ },
74
+ {
75
+ "epoch": 2.5,
76
+ "learning_rate": 1.3750137662166031e-05,
77
+ "loss": 1.5155,
78
+ "step": 113500
79
+ },
80
+ {
81
+ "epoch": 2.5,
82
+ "eval_acc": 0.5443592009315308,
83
+ "eval_acc_at_k": 0.8710528996099353,
84
+ "eval_loss": 1.7116812467575073,
85
+ "eval_runtime": 757.8065,
86
+ "eval_samples_per_second": 479.374,
87
+ "eval_steps_per_second": 7.491,
88
+ "step": 113500
89
+ },
90
+ {
91
+ "epoch": 3.0,
92
+ "learning_rate": 1.2500165194599238e-05,
93
+ "loss": 1.4211,
94
+ "step": 136200
95
+ },
96
+ {
97
+ "epoch": 3.0,
98
+ "eval_acc": 0.5492563444021439,
99
+ "eval_acc_at_k": 0.878157749130819,
100
+ "eval_loss": 1.6643491983413696,
101
+ "eval_runtime": 759.0909,
102
+ "eval_samples_per_second": 478.563,
103
+ "eval_steps_per_second": 7.479,
104
+ "step": 136200
105
+ },
106
+ {
107
+ "epoch": 3.5,
108
+ "learning_rate": 1.1250192727032445e-05,
109
+ "loss": 1.2865,
110
+ "step": 158900
111
+ },
112
+ {
113
+ "epoch": 3.5,
114
+ "eval_acc": 0.556774106525946,
115
+ "eval_acc_at_k": 0.8812353243979046,
116
+ "eval_loss": 1.6341607570648193,
117
+ "eval_runtime": 756.5457,
118
+ "eval_samples_per_second": 480.173,
119
+ "eval_steps_per_second": 7.504,
120
+ "step": 158900
121
+ },
122
+ {
123
+ "epoch": 4.0,
124
+ "learning_rate": 1.0000220259465652e-05,
125
+ "loss": 1.2357,
126
+ "step": 181600
127
+ },
128
+ {
129
+ "epoch": 4.0,
130
+ "eval_acc": 0.5588056365323049,
131
+ "eval_acc_at_k": 0.8852185546407247,
132
+ "eval_loss": 1.6077438592910767,
133
+ "eval_runtime": 755.3821,
134
+ "eval_samples_per_second": 480.913,
135
+ "eval_steps_per_second": 7.515,
136
+ "step": 181600
137
+ },
138
+ {
139
+ "epoch": 4.5,
140
+ "learning_rate": 8.750247791898858e-06,
141
+ "loss": 1.1303,
142
+ "step": 204300
143
+ },
144
+ {
145
+ "epoch": 4.5,
146
+ "eval_acc": 0.5632485761397076,
147
+ "eval_acc_at_k": 0.8873409254197256,
148
+ "eval_loss": 1.6022595167160034,
149
+ "eval_runtime": 756.1367,
150
+ "eval_samples_per_second": 480.433,
151
+ "eval_steps_per_second": 7.508,
152
+ "step": 204300
153
+ },
154
+ {
155
+ "epoch": 5.0,
156
+ "learning_rate": 7.500275324332064e-06,
157
+ "loss": 1.0987,
158
+ "step": 227000
159
+ },
160
+ {
161
+ "epoch": 5.0,
162
+ "eval_acc": 0.5652388148857747,
163
+ "eval_acc_at_k": 0.8896284612398939,
164
+ "eval_loss": 1.5783677101135254,
165
+ "eval_runtime": 753.7746,
166
+ "eval_samples_per_second": 481.938,
167
+ "eval_steps_per_second": 7.531,
168
+ "step": 227000
169
+ },
170
+ {
171
+ "epoch": 5.5,
172
+ "learning_rate": 6.25030285676527e-06,
173
+ "loss": 1.0186,
174
+ "step": 249700
175
+ },
176
+ {
177
+ "epoch": 5.5,
178
+ "eval_acc": 0.5673474219113449,
179
+ "eval_acc_at_k": 0.8903992314320084,
180
+ "eval_loss": 1.5781617164611816,
181
+ "eval_runtime": 755.2261,
182
+ "eval_samples_per_second": 481.012,
183
+ "eval_steps_per_second": 7.517,
184
+ "step": 249700
185
+ },
186
+ {
187
+ "epoch": 6.0,
188
+ "learning_rate": 5.000330389198477e-06,
189
+ "loss": 0.9982,
190
+ "step": 272400
191
+ },
192
+ {
193
+ "epoch": 6.0,
194
+ "eval_acc": 0.5707277997539041,
195
+ "eval_acc_at_k": 0.8913929744296989,
196
+ "eval_loss": 1.5712472200393677,
197
+ "eval_runtime": 755.7584,
198
+ "eval_samples_per_second": 480.673,
199
+ "eval_steps_per_second": 7.512,
200
+ "step": 272400
201
+ },
202
+ {
203
+ "epoch": 6.5,
204
+ "learning_rate": 3.7503579216316827e-06,
205
+ "loss": 0.9404,
206
+ "step": 295100
207
+ },
208
+ {
209
+ "epoch": 6.5,
210
+ "eval_acc": 0.5709975693211442,
211
+ "eval_acc_at_k": 0.8920068378327043,
212
+ "eval_loss": 1.5684970617294312,
213
+ "eval_runtime": 753.639,
214
+ "eval_samples_per_second": 482.025,
215
+ "eval_steps_per_second": 7.533,
216
+ "step": 295100
217
+ },
218
+ {
219
+ "epoch": 7.0,
220
+ "learning_rate": 2.500385454064889e-06,
221
+ "loss": 0.9263,
222
+ "step": 317800
223
+ },
224
+ {
225
+ "epoch": 7.0,
226
+ "eval_acc": 0.57247029093822,
227
+ "eval_acc_at_k": 0.8925023329562065,
228
+ "eval_loss": 1.5614935159683228,
229
+ "eval_runtime": 759.0312,
230
+ "eval_samples_per_second": 478.601,
231
+ "eval_steps_per_second": 7.479,
232
+ "step": 317800
233
+ },
234
+ {
235
+ "epoch": 7.5,
236
+ "learning_rate": 1.250412986498095e-06,
237
+ "loss": 0.8839,
238
+ "step": 340500
239
+ },
240
+ {
241
+ "epoch": 7.5,
242
+ "eval_acc": 0.5740751445882298,
243
+ "eval_acc_at_k": 0.8929262565618694,
244
+ "eval_loss": 1.5602914094924927,
245
+ "eval_runtime": 762.0808,
246
+ "eval_samples_per_second": 476.686,
247
+ "eval_steps_per_second": 7.449,
248
+ "step": 340500
249
+ }
250
+ ],
251
+ "max_steps": 363208,
252
+ "num_train_epochs": 8,
253
+ "total_flos": 8.029681855685318e+17,
254
+ "trial_name": null,
255
+ "trial_params": null
256
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8887aadbdbf0e95ece37ccd6d4b294d27d54e53293287a89603855f7b5894389
3
+ size 3899
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cefa0240b97317d3734b730ab9263c41bb62633a28ec6d94ea3015d5d6215b50
3
+ size 1132694197
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f620ed598f02c6e9ad8bd7c093e0bf1f883f0a113b315e3299ae7d1b85581a
3
+ size 17082912
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 512,
14
+ "name_or_path": "xlm-roberta-base",
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "special_tokens_map_file": null,
18
+ "tokenizer_class": "XLMRobertaTokenizer",
19
+ "unk_token": "<unk>"
20
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8887aadbdbf0e95ece37ccd6d4b294d27d54e53293287a89603855f7b5894389
3
+ size 3899