lgrobol commited on
Commit
f7fb1f1
·
1 Parent(s): 9a5edc1

make vocabulary minuscule

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "lgrobol/mbart-minuscule",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
@@ -45,7 +45,7 @@
45
  "normalize_before": true,
46
  "normalize_embedding": true,
47
  "num_beams": 5,
48
- "num_hidden_layers": 12,
49
  "output_past": true,
50
  "pad_token_id": 1,
51
  "scale_embedding": true,
@@ -54,5 +54,5 @@
54
  "torch_dtype": "float32",
55
  "transformers_version": "4.26.1",
56
  "use_cache": true,
57
- "vocab_size": 250054
58
  }
 
1
  {
2
+ "_name_or_path": ".",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
45
  "normalize_before": true,
46
  "normalize_embedding": true,
47
  "num_beams": 5,
48
+ "num_hidden_layers": 2,
49
  "output_past": true,
50
  "pad_token_id": 1,
51
  "scale_embedding": true,
 
54
  "torch_dtype": "float32",
55
  "transformers_version": "4.26.1",
56
  "use_cache": true,
57
+ "vocab_size": 256
58
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e21faf1a546468a8ff280281929734dde7244435145c040228bd5edb28597db
3
- size 131977105
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8abee1dcce4f5d4bed804f0447677245670384b90edcc733ba7a952be54bda
3
+ size 3081361
special_tokens_map.json CHANGED
@@ -53,10 +53,15 @@
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
- "bos_token": "<s>",
57
  "cls_token": "<s>",
58
  "eos_token": "</s>",
59
- "mask_token": "<mask>",
 
 
 
 
 
 
60
  "pad_token": "<pad>",
61
  "sep_token": "</s>",
62
  "unk_token": "<unk>"
 
53
  "gl_ES",
54
  "sl_SI"
55
  ],
 
56
  "cls_token": "<s>",
57
  "eos_token": "</s>",
58
+ "mask_token": {
59
+ "content": "<mask>",
60
+ "lstrip": true,
61
+ "normalized": true,
62
+ "rstrip": false,
63
+ "single_word": false
64
+ },
65
  "pad_token": "<pad>",
66
  "sep_token": "</s>",
67
  "unk_token": "<unk>"
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d58a68c276b56fcc48c165c63f70e5e4d452b4182032a5f7a2d018f4aa1a889
3
- size 17109752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4384eee58d54cf837101d35791567ebe1720e98e96b4e7c4a439af7ef4a0b7
3
+ size 18600
tokenizer_config.json CHANGED
@@ -53,10 +53,8 @@
53
  "gl_ES",
54
  "sl_SI"
55
  ],
56
- "bos_token": "<s>",
57
  "cls_token": "<s>",
58
  "eos_token": "</s>",
59
- "language_codes": "ML50",
60
  "mask_token": {
61
  "__type": "AddedToken",
62
  "content": "<mask>",
@@ -66,11 +64,8 @@
66
  "single_word": false
67
  },
68
  "model_max_length": 1000000000000000019884624838656,
69
- "name_or_path": "facebook/mbart-large-50-many-to-many-mmt",
70
  "pad_token": "<pad>",
71
  "sep_token": "</s>",
72
- "sp_model_kwargs": {},
73
- "special_tokens_map_file": "/home/suraj/projects/mbart-50/mbart-50/special_tokens_map.json",
74
  "src_lang": null,
75
  "tgt_lang": null,
76
  "tokenizer_class": "MBart50Tokenizer",
 
53
  "gl_ES",
54
  "sl_SI"
55
  ],
 
56
  "cls_token": "<s>",
57
  "eos_token": "</s>",
 
58
  "mask_token": {
59
  "__type": "AddedToken",
60
  "content": "<mask>",
 
64
  "single_word": false
65
  },
66
  "model_max_length": 1000000000000000019884624838656,
 
67
  "pad_token": "<pad>",
68
  "sep_token": "</s>",
 
 
69
  "src_lang": null,
70
  "tgt_lang": null,
71
  "tokenizer_class": "MBart50Tokenizer",