Bukareszt commited on
Commit
03024be
·
verified ·
1 Parent(s): 20e66db

Initial push

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ confusion_matrix.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: PKOBP/polish-roberta-8k
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ - precision
10
+ - recall
11
+ - f1
12
+ model-index:
13
+ - name: mwik-classifier-extended
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # mwik-classifier-extended
21
+
22
+ This model is a fine-tuned version of [PKOBP/polish-roberta-8k](https://huggingface.co/PKOBP/polish-roberta-8k) on the None dataset.
23
+ It achieves the following results on the evaluation set:
24
+ - Loss: 1.1518
25
+ - Accuracy: 0.7348
26
+ - Precision: 0.7235
27
+ - Recall: 0.7348
28
+ - F1: 0.7248
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 0.0001
48
+ - train_batch_size: 24
49
+ - eval_batch_size: 48
50
+ - seed: 42
51
+ - gradient_accumulation_steps: 4
52
+ - total_train_batch_size: 96
53
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
+ - lr_scheduler_type: polynomial
55
+ - lr_scheduler_warmup_ratio: 0.06
56
+ - num_epochs: 7
57
+ - mixed_precision_training: Native AMP
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
62
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
63
+ | 3.2781 | 1.0 | 64 | 1.8838 | 0.5812 | 0.4719 | 0.5812 | 0.5054 |
64
+ | 1.766 | 2.0 | 128 | 1.4098 | 0.6593 | 0.6038 | 0.6593 | 0.6058 |
65
+ | 1.3812 | 3.0 | 192 | 1.1837 | 0.7083 | 0.6701 | 0.7083 | 0.6749 |
66
+ | 0.8668 | 4.0 | 256 | 1.1232 | 0.7289 | 0.7062 | 0.7289 | 0.7052 |
67
+ | 0.6762 | 5.0 | 320 | 1.0806 | 0.7412 | 0.7268 | 0.7412 | 0.7247 |
68
+ | 0.5221 | 6.0 | 384 | 1.0815 | 0.7358 | 0.7206 | 0.7358 | 0.7234 |
69
+ | 0.4312 | 7.0 | 448 | 1.0919 | 0.7466 | 0.7281 | 0.7466 | 0.7320 |
70
+
71
+
72
+ ### Framework versions
73
+
74
+ - Transformers 4.57.3
75
+ - Pytorch 2.9.0+cu126
76
+ - Datasets 4.0.0
77
+ - Tokenizers 0.22.1
added_tokens.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<mask>": 128000,
3
+ "<user_token_10>": 128010,
4
+ "<user_token_11>": 128011,
5
+ "<user_token_12>": 128012,
6
+ "<user_token_13>": 128013,
7
+ "<user_token_14>": 128014,
8
+ "<user_token_15>": 128015,
9
+ "<user_token_16>": 128016,
10
+ "<user_token_17>": 128017,
11
+ "<user_token_18>": 128018,
12
+ "<user_token_19>": 128019,
13
+ "<user_token_1>": 128001,
14
+ "<user_token_20>": 128020,
15
+ "<user_token_21>": 128021,
16
+ "<user_token_22>": 128022,
17
+ "<user_token_23>": 128023,
18
+ "<user_token_24>": 128024,
19
+ "<user_token_25>": 128025,
20
+ "<user_token_26>": 128026,
21
+ "<user_token_27>": 128027,
22
+ "<user_token_28>": 128028,
23
+ "<user_token_29>": 128029,
24
+ "<user_token_2>": 128002,
25
+ "<user_token_30>": 128030,
26
+ "<user_token_31>": 128031,
27
+ "<user_token_32>": 128032,
28
+ "<user_token_33>": 128033,
29
+ "<user_token_34>": 128034,
30
+ "<user_token_35>": 128035,
31
+ "<user_token_36>": 128036,
32
+ "<user_token_37>": 128037,
33
+ "<user_token_38>": 128038,
34
+ "<user_token_39>": 128039,
35
+ "<user_token_3>": 128003,
36
+ "<user_token_40>": 128040,
37
+ "<user_token_41>": 128041,
38
+ "<user_token_42>": 128042,
39
+ "<user_token_43>": 128043,
40
+ "<user_token_44>": 128044,
41
+ "<user_token_45>": 128045,
42
+ "<user_token_46>": 128046,
43
+ "<user_token_47>": 128047,
44
+ "<user_token_48>": 128048,
45
+ "<user_token_49>": 128049,
46
+ "<user_token_4>": 128004,
47
+ "<user_token_50>": 128050,
48
+ "<user_token_51>": 128051,
49
+ "<user_token_52>": 128052,
50
+ "<user_token_53>": 128053,
51
+ "<user_token_54>": 128054,
52
+ "<user_token_55>": 128055,
53
+ "<user_token_56>": 128056,
54
+ "<user_token_57>": 128057,
55
+ "<user_token_58>": 128058,
56
+ "<user_token_59>": 128059,
57
+ "<user_token_5>": 128005,
58
+ "<user_token_60>": 128060,
59
+ "<user_token_61>": 128061,
60
+ "<user_token_62>": 128062,
61
+ "<user_token_63>": 128063,
62
+ "<user_token_6>": 128006,
63
+ "<user_token_7>": 128007,
64
+ "<user_token_8>": 128008,
65
+ "<user_token_9>": 128009
66
+ }
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.0,
3
+ "eval_accuracy": 0.7348193697156034,
4
+ "eval_f1": 0.724775902380406,
5
+ "eval_loss": 1.1517707109451294,
6
+ "eval_precision": 0.723456161938857,
7
+ "eval_recall": 0.7348193697156034,
8
+ "eval_runtime": 6.6281,
9
+ "eval_samples_per_second": 196.285,
10
+ "eval_steps_per_second": 4.224,
11
+ "total_flos": 5.11047224331433e+16,
12
+ "train_loss": 1.151742445571082,
13
+ "train_runtime": 761.7181,
14
+ "train_samples_per_second": 55.883,
15
+ "train_steps_per_second": 0.588
16
+ }
classification_report.txt ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ================================================================================
3
+
4
+ DETAILED CLASSIFICATION REPORT (Top-1)
5
+ ================================================================================
6
+ precision recall f1-score support
7
+
8
+ BINFO 0.7083 0.9444 0.8095 18
9
+ DANE_ARCH 0.8000 0.6667 0.7273 18
10
+ DAN_DO_ROZL 0.5172 0.6250 0.5660 24
11
+ DIERZ_ST_HYD 0.9074 0.9074 0.9074 54
12
+ DUZY_PRZ 1.0000 1.0000 1.0000 6
13
+ EBOK_ZGL 0.5000 0.4000 0.4444 5
14
+ EKSP_WOD 1.0000 0.8333 0.9091 6
15
+ GRVAT_ZM 0.0000 0.0000 0.0000 1
16
+ INFO_DW 0.6944 0.7353 0.7143 34
17
+ INSP 0.7250 0.7250 0.7250 40
18
+ INTERW_AW_K 0.6885 0.7500 0.7179 56
19
+ INTERW_AW_W 0.6875 0.6111 0.6471 54
20
+ INTERW_ODTW 0.6552 0.7600 0.7037 25
21
+ INTERW_ZAP 0.8462 0.8462 0.8462 13
22
+ KONT_WYM 1.0000 0.6667 0.8000 3
23
+ KOSZT_WP 0.0000 0.0000 0.0000 1
24
+ LIK_PRZYL_WK 0.0000 0.0000 0.0000 2
25
+ MAPA 0.0000 0.0000 0.0000 0
26
+ NEGOC_DESZCZ 0.8621 0.9091 0.8850 55
27
+ NOTA_KOR 0.8333 0.8333 0.8333 6
28
+ ODPL_INSP_TV 0.8000 0.5000 0.6154 8
29
+ ODP_CZYSZ_K 0.0000 0.0000 0.0000 1
30
+ ODP_DOW_W 0.6667 0.8571 0.7500 7
31
+ ODP_LAB_WS 0.8462 0.8462 0.8462 13
32
+ ODP_LOK_WYC 0.5000 0.3333 0.4000 3
33
+ ODP_OPL_WOD 0.0000 0.0000 0.0000 2
34
+ ODP_POM_CIS 1.0000 0.5000 0.6667 2
35
+ ODP_WYM_ODL 0.5000 0.5000 0.5000 6
36
+ ODP_WYM_WOD 0.0000 0.0000 0.0000 1
37
+ ODP_ZAW_US 0.0000 0.0000 0.0000 1
38
+ ODP_ZO_ZASU 0.5000 0.6667 0.5714 3
39
+ ODSZKOD 0.5714 1.0000 0.7273 4
40
+ ODWOD_KD 0.0000 0.0000 0.0000 2
41
+ ODWOD_KS 0.5714 1.0000 0.7273 4
42
+ OKR_WL_PRZEW 0.0000 0.0000 0.0000 1
43
+ OP_PRZY_WK 0.1818 0.1333 0.1538 15
44
+ OP_SIEC_WK 0.5588 0.6333 0.5938 30
45
+ OP_UM 0.9130 0.9130 0.9130 23
46
+ PENOM 0.4000 0.5000 0.4444 4
47
+ POMYLKA 0.0000 0.0000 0.0000 2
48
+ POTW_SALDA 0.3333 0.3333 0.3333 3
49
+ POTW_WPAT 0.7632 0.8286 0.7945 35
50
+ POZYTYW 0.6471 0.5500 0.5946 20
51
+ POZ_SPR_WIND 0.6753 0.7647 0.7172 68
52
+ PROLONG 0.0000 0.0000 0.0000 2
53
+ PROMESA 1.0000 0.7500 0.8571 4
54
+ PRZE 0.8710 0.7826 0.8244 69
55
+ PRZEKSIEG 0.0000 0.0000 0.0000 4
56
+ PRZEN_WOD 0.7500 0.7500 0.7500 4
57
+ PRZYW_DOS_W 0.4000 0.6667 0.5000 3
58
+ PYT 0.3000 0.2571 0.2769 35
59
+ REKLAMACJA 0.7955 0.8434 0.8187 83
60
+ RODO 0.0000 0.0000 0.0000 1
61
+ ROW_EKSP 0.7692 0.7692 0.7692 13
62
+ ROW_WYC 0.0000 0.0000 0.0000 1
63
+ ROZDZ_INSTAL 0.0000 0.0000 0.0000 2
64
+ ROZL_PL_RATY 0.6000 1.0000 0.7500 3
65
+ SK 0.5185 0.4000 0.4516 35
66
+ SUZEBNOSC 1.0000 0.8182 0.9000 11
67
+ UDOST_WN 0.8889 0.8889 0.8889 9
68
+ UM 0.8866 0.8515 0.8687 101
69
+ UM_PARTYCY 0.9091 0.9524 0.9302 21
70
+ UZN_SCIEKI 0.7500 0.6000 0.6667 5
71
+ UZ_PRZY_WK 0.8684 0.9429 0.9041 35
72
+ UZ_SIEC_WK 0.9231 0.7500 0.8276 16
73
+ WAR_ODW_KS 1.0000 0.6667 0.8000 3
74
+ WAR_PRZY_SIE 0.0000 0.0000 0.0000 5
75
+ WAR_WK 0.5294 0.6923 0.6000 13
76
+ WAR_WKKD 0.4375 0.4667 0.4516 15
77
+ WAR_WKROW 0.0000 0.0000 0.0000 2
78
+ WOD_OGR_PRZY 0.7059 0.9231 0.8000 13
79
+ WPIN_SIEC 0.7778 0.8750 0.8235 8
80
+ WYJ_ROZL 0.0000 0.0000 0.0000 3
81
+ WYM_PRZY_WK 0.5556 0.6250 0.5882 8
82
+ ZAP_JAKOSC 1.0000 0.3333 0.5000 3
83
+ ZASW_KONC 0.6667 0.6667 0.6667 6
84
+ ZG_ODCZ 0.8723 0.9111 0.8913 45
85
+ ZM 0.6000 0.8750 0.7119 24
86
+ ZW_ANEKS 0.5000 0.6667 0.5714 3
87
+ ZW_NADP 1.0000 0.7857 0.8800 14
88
+
89
+ accuracy 0.7348 1301
90
+ macro avg 0.5466 0.5448 0.5357 1301
91
+ weighted avg 0.7235 0.7348 0.7248 1301
config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_roberta.RobertaConfig",
8
+ "AutoModel": "modeling_roberta.RobertaModel",
9
+ "AutoModelForCausalLM": "modeling_roberta.RobertaForCausalLM",
10
+ "AutoModelForMaskedLM": "modeling_roberta.RobertaForMaskedLM",
11
+ "AutoModelForMultipleChoice": "modeling_roberta.RobertaForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "modeling_roberta.RobertaForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "modeling_roberta.RobertaForSequenceClassification",
14
+ "AutoModelForTokenClassification": "modeling_roberta.RobertaForTokenClassification"
15
+ },
16
+ "bos_token_id": 0,
17
+ "classifier_dropout": null,
18
+ "dtype": "float32",
19
+ "eos_token_id": 2,
20
+ "hidden_act": "gelu",
21
+ "hidden_dropout_prob": 0.1,
22
+ "hidden_size": 1024,
23
+ "id2label": {
24
+ "0": "BINFO",
25
+ "1": "DANE_ARCH",
26
+ "2": "DAN_DO_ROZL",
27
+ "3": "DIERZ_ST_HYD",
28
+ "4": "DUZY_PRZ",
29
+ "5": "EBOK_ZGL",
30
+ "6": "EKSP_WOD",
31
+ "7": "GRVAT_ZM",
32
+ "8": "INFO_DW",
33
+ "9": "INSP",
34
+ "10": "INTERW_AW_K",
35
+ "11": "INTERW_AW_W",
36
+ "12": "INTERW_ODTW",
37
+ "13": "INTERW_ZAP",
38
+ "14": "KONT_WYM",
39
+ "15": "KOSZT_UM",
40
+ "16": "KOSZT_WP",
41
+ "17": "LIK_PRZYL_WK",
42
+ "18": "MAPA",
43
+ "19": "NEGOC_DESZCZ",
44
+ "20": "NOTA_KOR",
45
+ "21": "ODPL_INSP_TV",
46
+ "22": "ODP_CZYSZ_K",
47
+ "23": "ODP_DOW_W",
48
+ "24": "ODP_LAB_WS",
49
+ "25": "ODP_LOK_WYC",
50
+ "26": "ODP_OPL_WOD",
51
+ "27": "ODP_POM_CIS",
52
+ "28": "ODP_WYM_ODL",
53
+ "29": "ODP_WYM_WOD",
54
+ "30": "ODP_ZAW_US",
55
+ "31": "ODP_ZO_ZASU",
56
+ "32": "ODSZKOD",
57
+ "33": "ODWOD_KD",
58
+ "34": "ODWOD_KS",
59
+ "35": "OKR_WL_PRZEW",
60
+ "36": "OP_PRZY_WK",
61
+ "37": "OP_SIEC_WK",
62
+ "38": "OP_UM",
63
+ "39": "PENOM",
64
+ "40": "POMYLKA",
65
+ "41": "POTW_SALDA",
66
+ "42": "POTW_WPAT",
67
+ "43": "POZYTYW",
68
+ "44": "POZ_SPR_WIND",
69
+ "45": "PROLONG",
70
+ "46": "PROMESA",
71
+ "47": "PRZE",
72
+ "48": "PRZEKSIEG",
73
+ "49": "PRZEK_SIEC",
74
+ "50": "PRZEN_WOD",
75
+ "51": "PRZYW_DOS_W",
76
+ "52": "PYT",
77
+ "53": "REKLAMACJA",
78
+ "54": "RODO",
79
+ "55": "ROW_EKSP",
80
+ "56": "ROW_WYC",
81
+ "57": "ROZDZ_INSTAL",
82
+ "58": "ROZL_PL_RATY",
83
+ "59": "SK",
84
+ "60": "SPR_SPOS_ZAS",
85
+ "61": "SUZEBNOSC",
86
+ "62": "UDOST_WN",
87
+ "63": "UM",
88
+ "64": "UM_PARTYCY",
89
+ "65": "UZN_SCIEKI",
90
+ "66": "UZ_PRZY_WK",
91
+ "67": "UZ_SIEC_WK",
92
+ "68": "WAR_ODW_KS",
93
+ "69": "WAR_PRZY_SIE",
94
+ "70": "WAR_WK",
95
+ "71": "WAR_WKKD",
96
+ "72": "WAR_WKROW",
97
+ "73": "WOD_OGR_PRZY",
98
+ "74": "WPIN_SIEC",
99
+ "75": "WYJ_ROZL",
100
+ "76": "WYM_PRZY_WK",
101
+ "77": "ZAP_JAKOSC",
102
+ "78": "ZASW_KONC",
103
+ "79": "ZG_ODCZ",
104
+ "80": "ZM",
105
+ "81": "ZW_ANEKS",
106
+ "82": "ZW_NADP"
107
+ },
108
+ "initializer_range": 0.02,
109
+ "intermediate_size": 4096,
110
+ "label2id": {
111
+ "BINFO": 0,
112
+ "DANE_ARCH": 1,
113
+ "DAN_DO_ROZL": 2,
114
+ "DIERZ_ST_HYD": 3,
115
+ "DUZY_PRZ": 4,
116
+ "EBOK_ZGL": 5,
117
+ "EKSP_WOD": 6,
118
+ "GRVAT_ZM": 7,
119
+ "INFO_DW": 8,
120
+ "INSP": 9,
121
+ "INTERW_AW_K": 10,
122
+ "INTERW_AW_W": 11,
123
+ "INTERW_ODTW": 12,
124
+ "INTERW_ZAP": 13,
125
+ "KONT_WYM": 14,
126
+ "KOSZT_UM": 15,
127
+ "KOSZT_WP": 16,
128
+ "LIK_PRZYL_WK": 17,
129
+ "MAPA": 18,
130
+ "NEGOC_DESZCZ": 19,
131
+ "NOTA_KOR": 20,
132
+ "ODPL_INSP_TV": 21,
133
+ "ODP_CZYSZ_K": 22,
134
+ "ODP_DOW_W": 23,
135
+ "ODP_LAB_WS": 24,
136
+ "ODP_LOK_WYC": 25,
137
+ "ODP_OPL_WOD": 26,
138
+ "ODP_POM_CIS": 27,
139
+ "ODP_WYM_ODL": 28,
140
+ "ODP_WYM_WOD": 29,
141
+ "ODP_ZAW_US": 30,
142
+ "ODP_ZO_ZASU": 31,
143
+ "ODSZKOD": 32,
144
+ "ODWOD_KD": 33,
145
+ "ODWOD_KS": 34,
146
+ "OKR_WL_PRZEW": 35,
147
+ "OP_PRZY_WK": 36,
148
+ "OP_SIEC_WK": 37,
149
+ "OP_UM": 38,
150
+ "PENOM": 39,
151
+ "POMYLKA": 40,
152
+ "POTW_SALDA": 41,
153
+ "POTW_WPAT": 42,
154
+ "POZYTYW": 43,
155
+ "POZ_SPR_WIND": 44,
156
+ "PROLONG": 45,
157
+ "PROMESA": 46,
158
+ "PRZE": 47,
159
+ "PRZEKSIEG": 48,
160
+ "PRZEK_SIEC": 49,
161
+ "PRZEN_WOD": 50,
162
+ "PRZYW_DOS_W": 51,
163
+ "PYT": 52,
164
+ "REKLAMACJA": 53,
165
+ "RODO": 54,
166
+ "ROW_EKSP": 55,
167
+ "ROW_WYC": 56,
168
+ "ROZDZ_INSTAL": 57,
169
+ "ROZL_PL_RATY": 58,
170
+ "SK": 59,
171
+ "SPR_SPOS_ZAS": 60,
172
+ "SUZEBNOSC": 61,
173
+ "UDOST_WN": 62,
174
+ "UM": 63,
175
+ "UM_PARTYCY": 64,
176
+ "UZN_SCIEKI": 65,
177
+ "UZ_PRZY_WK": 66,
178
+ "UZ_SIEC_WK": 67,
179
+ "WAR_ODW_KS": 68,
180
+ "WAR_PRZY_SIE": 69,
181
+ "WAR_WK": 70,
182
+ "WAR_WKKD": 71,
183
+ "WAR_WKROW": 72,
184
+ "WOD_OGR_PRZY": 73,
185
+ "WPIN_SIEC": 74,
186
+ "WYJ_ROZL": 75,
187
+ "WYM_PRZY_WK": 76,
188
+ "ZAP_JAKOSC": 77,
189
+ "ZASW_KONC": 78,
190
+ "ZG_ODCZ": 79,
191
+ "ZM": 80,
192
+ "ZW_ANEKS": 81,
193
+ "ZW_NADP": 82
194
+ },
195
+ "layer_norm_eps": 1e-05,
196
+ "max_position_embeddings": 8194,
197
+ "model_type": "roberta",
198
+ "num_attention_heads": 16,
199
+ "num_hidden_layers": 24,
200
+ "pad_token_id": 1,
201
+ "position_embedding_type": "absolute",
202
+ "problem_type": "single_label_classification",
203
+ "transformers_version": "4.57.3",
204
+ "type_vocab_size": 1,
205
+ "use_cache": true,
206
+ "vocab_size": 128064
207
+ }
configuration_roberta.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
3
+ # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ """ RoBERTa configuration"""
17
+ from collections import OrderedDict
18
+ from typing import Mapping
19
+
20
+ from transformers import PretrainedConfig
21
+ from transformers.onnx import OnnxConfig
22
+ from transformers.utils import logging
23
+
24
+
25
+ logger = logging.get_logger(__name__)
26
+
27
+
28
+ class RobertaConfig(PretrainedConfig):
29
+ r"""
30
+ This is the configuration class to store the configuration of a [`RobertaModel`] or a [`TFRobertaModel`]. It is
31
+ used to instantiate a RoBERTa model according to the specified arguments, defining the model architecture.
32
+ Instantiating a configuration with the defaults will yield a similar configuration to that of the RoBERTa
33
+ [FacebookAI/roberta-base](https://huggingface.co/FacebookAI/roberta-base) architecture.
34
+
35
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
36
+ documentation from [`PretrainedConfig`] for more information.
37
+
38
+
39
+ Args:
40
+ vocab_size (`int`, *optional*, defaults to 50265):
41
+ Vocabulary size of the RoBERTa model. Defines the number of different tokens that can be represented by the
42
+ `inputs_ids` passed when calling [`RobertaModel`] or [`TFRobertaModel`].
43
+ hidden_size (`int`, *optional*, defaults to 768):
44
+ Dimensionality of the encoder layers and the pooler layer.
45
+ num_hidden_layers (`int`, *optional*, defaults to 12):
46
+ Number of hidden layers in the Transformer encoder.
47
+ num_attention_heads (`int`, *optional*, defaults to 12):
48
+ Number of attention heads for each attention layer in the Transformer encoder.
49
+ intermediate_size (`int`, *optional*, defaults to 3072):
50
+ Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
51
+ hidden_act (`str` or `Callable`, *optional*, defaults to `"gelu"`):
52
+ The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
53
+ `"relu"`, `"silu"` and `"gelu_new"` are supported.
54
+ hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
55
+ The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
56
+ attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
57
+ The dropout ratio for the attention probabilities.
58
+ max_position_embeddings (`int`, *optional*, defaults to 512):
59
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
60
+ just in case (e.g., 512 or 1024 or 2048).
61
+ type_vocab_size (`int`, *optional*, defaults to 2):
62
+ The vocabulary size of the `token_type_ids` passed when calling [`RobertaModel`] or [`TFRobertaModel`].
63
+ initializer_range (`float`, *optional*, defaults to 0.02):
64
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
65
+ layer_norm_eps (`float`, *optional*, defaults to 1e-12):
66
+ The epsilon used by the layer normalization layers.
67
+ position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
68
+ Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query"`. For
69
+ positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
70
+ [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
71
+ For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
72
+ with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
73
+ is_decoder (`bool`, *optional*, defaults to `False`):
74
+ Whether the model is used as a decoder or not. If `False`, the model is used as an encoder.
75
+ use_cache (`bool`, *optional*, defaults to `True`):
76
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
77
+ relevant if `config.is_decoder=True`.
78
+ classifier_dropout (`float`, *optional*):
79
+ The dropout ratio for the classification head.
80
+
81
+ Examples:
82
+
83
+ ```python
84
+ >>> from transformers import RobertaConfig, RobertaModel
85
+
86
+ >>> # Initializing a RoBERTa configuration
87
+ >>> configuration = RobertaConfig()
88
+
89
+ >>> # Initializing a model (with random weights) from the configuration
90
+ >>> model = RobertaModel(configuration)
91
+
92
+ >>> # Accessing the model configuration
93
+ >>> configuration = model.config
94
+ ```"""
95
+
96
+ model_type = "roberta"
97
+
98
+ def __init__(
99
+ self,
100
+ vocab_size=50265,
101
+ hidden_size=768,
102
+ num_hidden_layers=12,
103
+ num_attention_heads=12,
104
+ intermediate_size=3072,
105
+ hidden_act="gelu",
106
+ hidden_dropout_prob=0.1,
107
+ attention_probs_dropout_prob=0.1,
108
+ max_position_embeddings=512,
109
+ type_vocab_size=2,
110
+ initializer_range=0.02,
111
+ layer_norm_eps=1e-12,
112
+ pad_token_id=1,
113
+ bos_token_id=0,
114
+ eos_token_id=2,
115
+ position_embedding_type="absolute",
116
+ use_cache=True,
117
+ classifier_dropout=None,
118
+ **kwargs,
119
+ ):
120
+ super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
121
+
122
+ self.vocab_size = vocab_size
123
+ self.hidden_size = hidden_size
124
+ self.num_hidden_layers = num_hidden_layers
125
+ self.num_attention_heads = num_attention_heads
126
+ self.hidden_act = hidden_act
127
+ self.intermediate_size = intermediate_size
128
+ self.hidden_dropout_prob = hidden_dropout_prob
129
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
130
+ self.max_position_embeddings = max_position_embeddings
131
+ self.type_vocab_size = type_vocab_size
132
+ self.initializer_range = initializer_range
133
+ self.layer_norm_eps = layer_norm_eps
134
+ self.position_embedding_type = position_embedding_type
135
+ self.use_cache = use_cache
136
+ self.classifier_dropout = classifier_dropout
137
+
138
+
139
+ class RobertaOnnxConfig(OnnxConfig):
140
+ @property
141
+ def inputs(self) -> Mapping[str, Mapping[int, str]]:
142
+ if self.task == "multiple-choice":
143
+ dynamic_axis = {0: "batch", 1: "choice", 2: "sequence"}
144
+ else:
145
+ dynamic_axis = {0: "batch", 1: "sequence"}
146
+ return OrderedDict(
147
+ [
148
+ ("input_ids", dynamic_axis),
149
+ ("attention_mask", dynamic_axis),
150
+ ]
151
+ )
confusion_matrix.png ADDED

Git LFS Details

  • SHA256: ce812af385afc99b5bacc820bdef9e45d8795a2eef997f15c86a25e1e7fc768a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.3 MB
label_info.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label2id": {
3
+ "BINFO": 0,
4
+ "DANE_ARCH": 1,
5
+ "DAN_DO_ROZL": 2,
6
+ "DIERZ_ST_HYD": 3,
7
+ "DUZY_PRZ": 4,
8
+ "EBOK_ZGL": 5,
9
+ "EKSP_WOD": 6,
10
+ "GRVAT_ZM": 7,
11
+ "INFO_DW": 8,
12
+ "INSP": 9,
13
+ "INTERW_AW_K": 10,
14
+ "INTERW_AW_W": 11,
15
+ "INTERW_ODTW": 12,
16
+ "INTERW_ZAP": 13,
17
+ "KONT_WYM": 14,
18
+ "KOSZT_UM": 15,
19
+ "KOSZT_WP": 16,
20
+ "LIK_PRZYL_WK": 17,
21
+ "MAPA": 18,
22
+ "NEGOC_DESZCZ": 19,
23
+ "NOTA_KOR": 20,
24
+ "ODPL_INSP_TV": 21,
25
+ "ODP_CZYSZ_K": 22,
26
+ "ODP_DOW_W": 23,
27
+ "ODP_LAB_WS": 24,
28
+ "ODP_LOK_WYC": 25,
29
+ "ODP_OPL_WOD": 26,
30
+ "ODP_POM_CIS": 27,
31
+ "ODP_WYM_ODL": 28,
32
+ "ODP_WYM_WOD": 29,
33
+ "ODP_ZAW_US": 30,
34
+ "ODP_ZO_ZASU": 31,
35
+ "ODSZKOD": 32,
36
+ "ODWOD_KD": 33,
37
+ "ODWOD_KS": 34,
38
+ "OKR_WL_PRZEW": 35,
39
+ "OP_PRZY_WK": 36,
40
+ "OP_SIEC_WK": 37,
41
+ "OP_UM": 38,
42
+ "PENOM": 39,
43
+ "POMYLKA": 40,
44
+ "POTW_SALDA": 41,
45
+ "POTW_WPAT": 42,
46
+ "POZYTYW": 43,
47
+ "POZ_SPR_WIND": 44,
48
+ "PROLONG": 45,
49
+ "PROMESA": 46,
50
+ "PRZE": 47,
51
+ "PRZEKSIEG": 48,
52
+ "PRZEK_SIEC": 49,
53
+ "PRZEN_WOD": 50,
54
+ "PRZYW_DOS_W": 51,
55
+ "PYT": 52,
56
+ "REKLAMACJA": 53,
57
+ "RODO": 54,
58
+ "ROW_EKSP": 55,
59
+ "ROW_WYC": 56,
60
+ "ROZDZ_INSTAL": 57,
61
+ "ROZL_PL_RATY": 58,
62
+ "SK": 59,
63
+ "SPR_SPOS_ZAS": 60,
64
+ "SUZEBNOSC": 61,
65
+ "UDOST_WN": 62,
66
+ "UM": 63,
67
+ "UM_PARTYCY": 64,
68
+ "UZN_SCIEKI": 65,
69
+ "UZ_PRZY_WK": 66,
70
+ "UZ_SIEC_WK": 67,
71
+ "WAR_ODW_KS": 68,
72
+ "WAR_PRZY_SIE": 69,
73
+ "WAR_WK": 70,
74
+ "WAR_WKKD": 71,
75
+ "WAR_WKROW": 72,
76
+ "WOD_OGR_PRZY": 73,
77
+ "WPIN_SIEC": 74,
78
+ "WYJ_ROZL": 75,
79
+ "WYM_PRZY_WK": 76,
80
+ "ZAP_JAKOSC": 77,
81
+ "ZASW_KONC": 78,
82
+ "ZG_ODCZ": 79,
83
+ "ZM": 80,
84
+ "ZW_ANEKS": 81,
85
+ "ZW_NADP": 82
86
+ },
87
+ "id2label": {
88
+ "0": "BINFO",
89
+ "1": "DANE_ARCH",
90
+ "2": "DAN_DO_ROZL",
91
+ "3": "DIERZ_ST_HYD",
92
+ "4": "DUZY_PRZ",
93
+ "5": "EBOK_ZGL",
94
+ "6": "EKSP_WOD",
95
+ "7": "GRVAT_ZM",
96
+ "8": "INFO_DW",
97
+ "9": "INSP",
98
+ "10": "INTERW_AW_K",
99
+ "11": "INTERW_AW_W",
100
+ "12": "INTERW_ODTW",
101
+ "13": "INTERW_ZAP",
102
+ "14": "KONT_WYM",
103
+ "15": "KOSZT_UM",
104
+ "16": "KOSZT_WP",
105
+ "17": "LIK_PRZYL_WK",
106
+ "18": "MAPA",
107
+ "19": "NEGOC_DESZCZ",
108
+ "20": "NOTA_KOR",
109
+ "21": "ODPL_INSP_TV",
110
+ "22": "ODP_CZYSZ_K",
111
+ "23": "ODP_DOW_W",
112
+ "24": "ODP_LAB_WS",
113
+ "25": "ODP_LOK_WYC",
114
+ "26": "ODP_OPL_WOD",
115
+ "27": "ODP_POM_CIS",
116
+ "28": "ODP_WYM_ODL",
117
+ "29": "ODP_WYM_WOD",
118
+ "30": "ODP_ZAW_US",
119
+ "31": "ODP_ZO_ZASU",
120
+ "32": "ODSZKOD",
121
+ "33": "ODWOD_KD",
122
+ "34": "ODWOD_KS",
123
+ "35": "OKR_WL_PRZEW",
124
+ "36": "OP_PRZY_WK",
125
+ "37": "OP_SIEC_WK",
126
+ "38": "OP_UM",
127
+ "39": "PENOM",
128
+ "40": "POMYLKA",
129
+ "41": "POTW_SALDA",
130
+ "42": "POTW_WPAT",
131
+ "43": "POZYTYW",
132
+ "44": "POZ_SPR_WIND",
133
+ "45": "PROLONG",
134
+ "46": "PROMESA",
135
+ "47": "PRZE",
136
+ "48": "PRZEKSIEG",
137
+ "49": "PRZEK_SIEC",
138
+ "50": "PRZEN_WOD",
139
+ "51": "PRZYW_DOS_W",
140
+ "52": "PYT",
141
+ "53": "REKLAMACJA",
142
+ "54": "RODO",
143
+ "55": "ROW_EKSP",
144
+ "56": "ROW_WYC",
145
+ "57": "ROZDZ_INSTAL",
146
+ "58": "ROZL_PL_RATY",
147
+ "59": "SK",
148
+ "60": "SPR_SPOS_ZAS",
149
+ "61": "SUZEBNOSC",
150
+ "62": "UDOST_WN",
151
+ "63": "UM",
152
+ "64": "UM_PARTYCY",
153
+ "65": "UZN_SCIEKI",
154
+ "66": "UZ_PRZY_WK",
155
+ "67": "UZ_SIEC_WK",
156
+ "68": "WAR_ODW_KS",
157
+ "69": "WAR_PRZY_SIE",
158
+ "70": "WAR_WK",
159
+ "71": "WAR_WKKD",
160
+ "72": "WAR_WKROW",
161
+ "73": "WOD_OGR_PRZY",
162
+ "74": "WPIN_SIEC",
163
+ "75": "WYJ_ROZL",
164
+ "76": "WYM_PRZY_WK",
165
+ "77": "ZAP_JAKOSC",
166
+ "78": "ZASW_KONC",
167
+ "79": "ZG_ODCZ",
168
+ "80": "ZM",
169
+ "81": "ZW_ANEKS",
170
+ "82": "ZW_NADP"
171
+ },
172
+ "num_labels": 83
173
+ }
logs/events.out.tfevents.1765909715.a68abee7a327.906.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326d0fdc8691d325aa58bc4d798fd6b39c6d253b4384aa5e18ed5f8b1ed5d6f1
3
+ size 14605
logs/events.out.tfevents.1765910484.a68abee7a327.906.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db97219afd49a5a42b0590f0a2c6e7f5e7a131bdb4afeb3863640396b6079142
3
+ size 560
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bbc10888abc0cf1e6e0d33794129dd6d0d0bf83e6df293a9bfb68cc08965766
3
+ size 1771949724
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
test_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.0,
3
+ "eval_accuracy": 0.7348193697156034,
4
+ "eval_f1": 0.724775902380406,
5
+ "eval_loss": 1.1517707109451294,
6
+ "eval_precision": 0.723456161938857,
7
+ "eval_recall": 0.7348193697156034,
8
+ "eval_runtime": 6.6281,
9
+ "eval_samples_per_second": 196.285,
10
+ "eval_steps_per_second": 4.224
11
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "128000": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "128001": {
45
+ "content": "<user_token_1>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "128002": {
53
+ "content": "<user_token_2>",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "128003": {
61
+ "content": "<user_token_3>",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "128004": {
69
+ "content": "<user_token_4>",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "128005": {
77
+ "content": "<user_token_5>",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "128006": {
85
+ "content": "<user_token_6>",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "128007": {
93
+ "content": "<user_token_7>",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "128008": {
101
+ "content": "<user_token_8>",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "128009": {
109
+ "content": "<user_token_9>",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "128010": {
117
+ "content": "<user_token_10>",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "128011": {
125
+ "content": "<user_token_11>",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "128012": {
133
+ "content": "<user_token_12>",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "128013": {
141
+ "content": "<user_token_13>",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "128014": {
149
+ "content": "<user_token_14>",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "128015": {
157
+ "content": "<user_token_15>",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "128016": {
165
+ "content": "<user_token_16>",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "128017": {
173
+ "content": "<user_token_17>",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "128018": {
181
+ "content": "<user_token_18>",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "128019": {
189
+ "content": "<user_token_19>",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "128020": {
197
+ "content": "<user_token_20>",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "128021": {
205
+ "content": "<user_token_21>",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "128022": {
213
+ "content": "<user_token_22>",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "128023": {
221
+ "content": "<user_token_23>",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "128024": {
229
+ "content": "<user_token_24>",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "128025": {
237
+ "content": "<user_token_25>",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "128026": {
245
+ "content": "<user_token_26>",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "128027": {
253
+ "content": "<user_token_27>",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "128028": {
261
+ "content": "<user_token_28>",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "128029": {
269
+ "content": "<user_token_29>",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "128030": {
277
+ "content": "<user_token_30>",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "128031": {
285
+ "content": "<user_token_31>",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "128032": {
293
+ "content": "<user_token_32>",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "128033": {
301
+ "content": "<user_token_33>",
302
+ "lstrip": false,
303
+ "normalized": true,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "128034": {
309
+ "content": "<user_token_34>",
310
+ "lstrip": false,
311
+ "normalized": true,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ },
316
+ "128035": {
317
+ "content": "<user_token_35>",
318
+ "lstrip": false,
319
+ "normalized": true,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": false
323
+ },
324
+ "128036": {
325
+ "content": "<user_token_36>",
326
+ "lstrip": false,
327
+ "normalized": true,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": false
331
+ },
332
+ "128037": {
333
+ "content": "<user_token_37>",
334
+ "lstrip": false,
335
+ "normalized": true,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": false
339
+ },
340
+ "128038": {
341
+ "content": "<user_token_38>",
342
+ "lstrip": false,
343
+ "normalized": true,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": false
347
+ },
348
+ "128039": {
349
+ "content": "<user_token_39>",
350
+ "lstrip": false,
351
+ "normalized": true,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": false
355
+ },
356
+ "128040": {
357
+ "content": "<user_token_40>",
358
+ "lstrip": false,
359
+ "normalized": true,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": false
363
+ },
364
+ "128041": {
365
+ "content": "<user_token_41>",
366
+ "lstrip": false,
367
+ "normalized": true,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": false
371
+ },
372
+ "128042": {
373
+ "content": "<user_token_42>",
374
+ "lstrip": false,
375
+ "normalized": true,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": false
379
+ },
380
+ "128043": {
381
+ "content": "<user_token_43>",
382
+ "lstrip": false,
383
+ "normalized": true,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": false
387
+ },
388
+ "128044": {
389
+ "content": "<user_token_44>",
390
+ "lstrip": false,
391
+ "normalized": true,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": false
395
+ },
396
+ "128045": {
397
+ "content": "<user_token_45>",
398
+ "lstrip": false,
399
+ "normalized": true,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": false
403
+ },
404
+ "128046": {
405
+ "content": "<user_token_46>",
406
+ "lstrip": false,
407
+ "normalized": true,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": false
411
+ },
412
+ "128047": {
413
+ "content": "<user_token_47>",
414
+ "lstrip": false,
415
+ "normalized": true,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": false
419
+ },
420
+ "128048": {
421
+ "content": "<user_token_48>",
422
+ "lstrip": false,
423
+ "normalized": true,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": false
427
+ },
428
+ "128049": {
429
+ "content": "<user_token_49>",
430
+ "lstrip": false,
431
+ "normalized": true,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": false
435
+ },
436
+ "128050": {
437
+ "content": "<user_token_50>",
438
+ "lstrip": false,
439
+ "normalized": true,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": false
443
+ },
444
+ "128051": {
445
+ "content": "<user_token_51>",
446
+ "lstrip": false,
447
+ "normalized": true,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": false
451
+ },
452
+ "128052": {
453
+ "content": "<user_token_52>",
454
+ "lstrip": false,
455
+ "normalized": true,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": false
459
+ },
460
+ "128053": {
461
+ "content": "<user_token_53>",
462
+ "lstrip": false,
463
+ "normalized": true,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": false
467
+ },
468
+ "128054": {
469
+ "content": "<user_token_54>",
470
+ "lstrip": false,
471
+ "normalized": true,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": false
475
+ },
476
+ "128055": {
477
+ "content": "<user_token_55>",
478
+ "lstrip": false,
479
+ "normalized": true,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": false
483
+ },
484
+ "128056": {
485
+ "content": "<user_token_56>",
486
+ "lstrip": false,
487
+ "normalized": true,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": false
491
+ },
492
+ "128057": {
493
+ "content": "<user_token_57>",
494
+ "lstrip": false,
495
+ "normalized": true,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": false
499
+ },
500
+ "128058": {
501
+ "content": "<user_token_58>",
502
+ "lstrip": false,
503
+ "normalized": true,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": false
507
+ },
508
+ "128059": {
509
+ "content": "<user_token_59>",
510
+ "lstrip": false,
511
+ "normalized": true,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": false
515
+ },
516
+ "128060": {
517
+ "content": "<user_token_60>",
518
+ "lstrip": false,
519
+ "normalized": true,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": false
523
+ },
524
+ "128061": {
525
+ "content": "<user_token_61>",
526
+ "lstrip": false,
527
+ "normalized": true,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": false
531
+ },
532
+ "128062": {
533
+ "content": "<user_token_62>",
534
+ "lstrip": false,
535
+ "normalized": true,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": false
539
+ },
540
+ "128063": {
541
+ "content": "<user_token_63>",
542
+ "lstrip": false,
543
+ "normalized": true,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": false
547
+ }
548
+ },
549
+ "bos_token": "<s>",
550
+ "clean_up_tokenization_spaces": false,
551
+ "cls_token": "<s>",
552
+ "eos_token": "</s>",
553
+ "errors": "replace",
554
+ "extra_special_tokens": {},
555
+ "mask_token": "<mask>",
556
+ "model_max_length": 1000000000000000019884624838656,
557
+ "pad_token": "<pad>",
558
+ "sep_token": "</s>",
559
+ "tokenizer_class": "RobertaTokenizer",
560
+ "trim_offsets": true,
561
+ "unk_token": "<unk>"
562
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.0,
3
+ "total_flos": 5.11047224331433e+16,
4
+ "train_loss": 1.151742445571082,
5
+ "train_runtime": 761.7181,
6
+ "train_samples_per_second": 55.883,
7
+ "train_steps_per_second": 0.588
8
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69aa8254a3525e98c4489875babf1dacaff8116ddc16e94e4f1d376257e65b4e
3
+ size 5905
unigram.json ADDED
The diff for this file is too large to render. See raw diff