veton-berisha commited on
Commit
84ff9db
·
verified ·
1 Parent(s): 54a7133

New model – mse=0.1021

Browse files
config.json CHANGED
@@ -1,35 +1,23 @@
1
  {
2
  "architectures": [
3
- "XLMRobertaModel"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
7
- "classifier_dropout": null,
8
  "eos_token_id": 2,
9
- "gradient_checkpointing": false,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 768,
13
- "id2label": {
14
- "0": "LABEL_0"
15
- },
16
  "initializer_range": 0.02,
17
  "intermediate_size": 3072,
18
- "label2id": {
19
- "LABEL_0": 0
20
- },
21
  "layer_norm_eps": 1e-05,
22
  "max_position_embeddings": 514,
23
- "model_type": "xlm-roberta",
24
  "num_attention_heads": 12,
25
  "num_hidden_layers": 12,
26
- "output_past": true,
27
  "pad_token_id": 1,
28
- "position_embedding_type": "absolute",
29
- "problem_type": "regression",
30
  "torch_dtype": "float32",
31
  "transformers_version": "4.51.3",
32
- "type_vocab_size": 1,
33
- "use_cache": true,
34
- "vocab_size": 250002
35
  }
 
1
  {
2
  "architectures": [
3
+ "MPNetModel"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
 
7
  "eos_token_id": 2,
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
 
 
 
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
 
 
 
13
  "layer_norm_eps": 1e-05,
14
  "max_position_embeddings": 514,
15
+ "model_type": "mpnet",
16
  "num_attention_heads": 12,
17
  "num_hidden_layers": 12,
 
18
  "pad_token_id": 1,
19
+ "relative_attention_num_buckets": 32,
 
20
  "torch_dtype": "float32",
21
  "transformers_version": "4.51.3",
22
+ "vocab_size": 30527
 
 
23
  }
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 128,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 256,
3
  "do_lower_case": false
4
  }
special_tokens_map.json CHANGED
@@ -42,7 +42,7 @@
42
  "single_word": false
43
  },
44
  "unk_token": {
45
- "content": "<unk>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
 
42
  "single_word": false
43
  },
44
  "unk_token": {
45
+ "content": "[UNK]",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c088c06cf975b7097e469bd69630cdb0d675c6db1ce3af1042b6e19c6d01f22
3
- size 17082999
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ee64512c4fd9554e98781bbe75e2b5c6a08dfbd32a970e8da4d4c5aae059fec
3
+ size 710932
tokenizer_config.json CHANGED
@@ -27,12 +27,20 @@
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
 
 
 
 
 
 
 
 
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "250001": {
36
  "content": "<mask>",
37
  "lstrip": true,
38
  "normalized": false,
@@ -44,19 +52,22 @@
44
  "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": false,
46
  "cls_token": "<s>",
 
47
  "eos_token": "</s>",
48
  "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
  "max_length": 128,
51
- "model_max_length": 128,
52
  "pad_to_multiple_of": null,
53
  "pad_token": "<pad>",
54
  "pad_token_type_id": 0,
55
  "padding_side": "right",
56
  "sep_token": "</s>",
57
  "stride": 0,
58
- "tokenizer_class": "XLMRobertaTokenizer",
 
 
59
  "truncation_side": "right",
60
  "truncation_strategy": "longest_first",
61
- "unk_token": "<unk>"
62
  }
 
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "104": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "30526": {
44
  "content": "<mask>",
45
  "lstrip": true,
46
  "normalized": false,
 
52
  "bos_token": "<s>",
53
  "clean_up_tokenization_spaces": false,
54
  "cls_token": "<s>",
55
+ "do_lower_case": true,
56
  "eos_token": "</s>",
57
  "extra_special_tokens": {},
58
  "mask_token": "<mask>",
59
  "max_length": 128,
60
+ "model_max_length": 256,
61
  "pad_to_multiple_of": null,
62
  "pad_token": "<pad>",
63
  "pad_token_type_id": 0,
64
  "padding_side": "right",
65
  "sep_token": "</s>",
66
  "stride": 0,
67
+ "strip_accents": null,
68
+ "tokenize_chinese_chars": true,
69
+ "tokenizer_class": "MPNetTokenizer",
70
  "truncation_side": "right",
71
  "truncation_strategy": "longest_first",
72
+ "unk_token": "[UNK]"
73
  }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff