CLTL commited on
Commit
6d41847
·
verified ·
1 Parent(s): f38dff0

Added the model files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[DOMAIN_ADM]": 52000,
3
+ "[DOMAIN_ATT]": 52001,
4
+ "[DOMAIN_BER]": 52002,
5
+ "[DOMAIN_CBP]": 52014,
6
+ "[DOMAIN_ENR]": 52003,
7
+ "[DOMAIN_ETN]": 52004,
8
+ "[DOMAIN_FAC]": 52005,
9
+ "[DOMAIN_FML]": 52016,
10
+ "[DOMAIN_HLC]": 52010,
11
+ "[DOMAIN_HRN]": 52011,
12
+ "[DOMAIN_HSP]": 52013,
13
+ "[DOMAIN_INS]": 52006,
14
+ "[DOMAIN_MAE]": 52015,
15
+ "[DOMAIN_MBW]": 52007,
16
+ "[DOMAIN_SLP]": 52009,
17
+ "[DOMAIN_SOP]": 52012,
18
+ "[DOMAIN_STM]": 52008
19
+ }
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "LABEL_0": 0
20
+ },
21
+ "layer_norm_eps": 1e-12,
22
+ "max_position_embeddings": 514,
23
+ "model_type": "roberta",
24
+ "num_attention_heads": 12,
25
+ "num_hidden_layers": 12,
26
+ "pad_token_id": 1,
27
+ "position_embedding_type": "absolute",
28
+ "problem_type": "regression",
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.52.4",
31
+ "type_vocab_size": 1,
32
+ "use_cache": true,
33
+ "vocab_size": 52017
34
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d5052b6ab036a0443d1797f8907d974eaf16415f67ac0b5178b19fe597a7148
3
+ size 503991892
model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "../models/levels_all_tokens_sents_combined_all/best_model/", "cache_dir": "../models/cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 100, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 10, "loss_type": null, "loss_args": {}, "manual_seed": 19, "max_grad_norm": 1.0, "max_seq_length": 512, "model_name": "CLTL/MedRoBERTa.nl", "model_type": "roberta", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 1, "optimizer": "AdamW", "output_dir": "../models/levels_all_tokens_sents_combined_all/", "overwrite_output_dir": false, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 50, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": "../models/runs/", "thread_count": null, "tokenizer_name": "CLTL/MedRoBERTa.nl", "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": false, "use_multiprocessing_for_evaluation": false, "wandb_kwargs": {"name": "default"}, "wandb_project": "levels_all_tokens_sents_combined_all", "warmup_ratio": 0.06, "warmup_steps": 188, "weight_decay": 0.0, "model_class": "ClassificationModel", "labels_list": [0], "labels_map": {}, "lazy_delimiter": "\t", "lazy_labels_column": 1, "lazy_loading": false, "lazy_loading_start_line": 1, "lazy_text_a_column": null, "lazy_text_b_column": null, "lazy_text_column": 0, "onnx": false, "regression": true, "sliding_window": false, "special_tokens_list": [], "stride": 0.8, "tie_value": 1}
special_tokens_map.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[DOMAIN_ADM]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "[DOMAIN_ATT]",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "[DOMAIN_BER]",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "[DOMAIN_ENR]",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "[DOMAIN_ETN]",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "[DOMAIN_FAC]",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "[DOMAIN_INS]",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "[DOMAIN_MBW]",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "[DOMAIN_STM]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "[DOMAIN_SLP]",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "[DOMAIN_HLC]",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "[DOMAIN_HRN]",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "[DOMAIN_SOP]",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "[DOMAIN_HSP]",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "[DOMAIN_CBP]",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "[DOMAIN_MAE]",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "[DOMAIN_FML]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ }
122
+ ],
123
+ "bos_token": {
124
+ "content": "<s>",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false
129
+ },
130
+ "cls_token": {
131
+ "content": "<s>",
132
+ "lstrip": false,
133
+ "normalized": true,
134
+ "rstrip": false,
135
+ "single_word": false
136
+ },
137
+ "eos_token": {
138
+ "content": "</s>",
139
+ "lstrip": false,
140
+ "normalized": true,
141
+ "rstrip": false,
142
+ "single_word": false
143
+ },
144
+ "mask_token": {
145
+ "content": "<mask>",
146
+ "lstrip": true,
147
+ "normalized": false,
148
+ "rstrip": false,
149
+ "single_word": false
150
+ },
151
+ "pad_token": {
152
+ "content": "<pad>",
153
+ "lstrip": false,
154
+ "normalized": true,
155
+ "rstrip": false,
156
+ "single_word": false
157
+ },
158
+ "sep_token": {
159
+ "content": "</s>",
160
+ "lstrip": false,
161
+ "normalized": true,
162
+ "rstrip": false,
163
+ "single_word": false
164
+ },
165
+ "unk_token": {
166
+ "content": "<unk>",
167
+ "lstrip": false,
168
+ "normalized": true,
169
+ "rstrip": false,
170
+ "single_word": false
171
+ }
172
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "52000": {
45
+ "content": "[DOMAIN_ADM]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "52001": {
53
+ "content": "[DOMAIN_ATT]",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "52002": {
61
+ "content": "[DOMAIN_BER]",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "52003": {
69
+ "content": "[DOMAIN_ENR]",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "52004": {
77
+ "content": "[DOMAIN_ETN]",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "52005": {
85
+ "content": "[DOMAIN_FAC]",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "52006": {
93
+ "content": "[DOMAIN_INS]",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "52007": {
101
+ "content": "[DOMAIN_MBW]",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "52008": {
109
+ "content": "[DOMAIN_STM]",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "52009": {
117
+ "content": "[DOMAIN_SLP]",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "52010": {
125
+ "content": "[DOMAIN_HLC]",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "52011": {
133
+ "content": "[DOMAIN_HRN]",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "52012": {
141
+ "content": "[DOMAIN_SOP]",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "52013": {
149
+ "content": "[DOMAIN_HSP]",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "52014": {
157
+ "content": "[DOMAIN_CBP]",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "52015": {
165
+ "content": "[DOMAIN_MAE]",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "52016": {
173
+ "content": "[DOMAIN_FML]",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ }
180
+ },
181
+ "additional_special_tokens": [
182
+ "[DOMAIN_ADM]",
183
+ "[DOMAIN_ATT]",
184
+ "[DOMAIN_BER]",
185
+ "[DOMAIN_ENR]",
186
+ "[DOMAIN_ETN]",
187
+ "[DOMAIN_FAC]",
188
+ "[DOMAIN_INS]",
189
+ "[DOMAIN_MBW]",
190
+ "[DOMAIN_STM]",
191
+ "[DOMAIN_SLP]",
192
+ "[DOMAIN_HLC]",
193
+ "[DOMAIN_HRN]",
194
+ "[DOMAIN_SOP]",
195
+ "[DOMAIN_HSP]",
196
+ "[DOMAIN_CBP]",
197
+ "[DOMAIN_MAE]",
198
+ "[DOMAIN_FML]"
199
+ ],
200
+ "bos_token": "<s>",
201
+ "clean_up_tokenization_spaces": false,
202
+ "cls_token": "<s>",
203
+ "eos_token": "</s>",
204
+ "errors": "replace",
205
+ "extra_special_tokens": {},
206
+ "mask_token": "<mask>",
207
+ "model_max_length": 1000000000000000019884624838656,
208
+ "pad_token": "<pad>",
209
+ "sep_token": "</s>",
210
+ "tokenizer_class": "RobertaTokenizer",
211
+ "trim_offsets": true,
212
+ "unk_token": "<unk>"
213
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9744598b8e051439ee840a578182efd06df84a1c335d377a4a00d0ad440c1103
3
+ size 4241
vocab.json ADDED
The diff for this file is too large to render. See raw diff