Upload 6 files
Browse files- __init__.py +0 -0
- config.json +17 -0
- merges.txt +0 -0
- special_tokens_map.json +7 -0
- tokenizer_config.json +11 -0
- vocab.json +0 -0
__init__.py
ADDED
|
File without changes
|
config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": ["SmallTransformer"],
|
| 3 |
+
"model_type": "small_transformer",
|
| 4 |
+
"vocab_size": 80000,
|
| 5 |
+
"d_model": 256,
|
| 6 |
+
"nhead": 8,
|
| 7 |
+
"num_encoder_layers": 3,
|
| 8 |
+
"num_decoder_layers": 3,
|
| 9 |
+
"dim_feedforward": 512,
|
| 10 |
+
"dropout": 0.1,
|
| 11 |
+
"pad_token_id": 0,
|
| 12 |
+
"bos_token_id": 1,
|
| 13 |
+
"eos_token_id": 2,
|
| 14 |
+
"max_position_embeddings": 512,
|
| 15 |
+
"torch_dtype": "float32",
|
| 16 |
+
"transformers_version": "4.36.0"
|
| 17 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"eos_token": "</s>",
|
| 4 |
+
"pad_token": "<pad>",
|
| 5 |
+
"unk_token": "<unk>",
|
| 6 |
+
"mask_token": "<mask>"
|
| 7 |
+
}
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"tokenizer_class": "ByteLevelBPETokenizer",
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"bos_token": "<s>",
|
| 5 |
+
"eos_token": "</s>",
|
| 6 |
+
"pad_token": "<pad>",
|
| 7 |
+
"unk_token": "<unk>",
|
| 8 |
+
"mask_token": "<mask>",
|
| 9 |
+
"additional_special_tokens": ["<hi>", "<bn>"],
|
| 10 |
+
"model_max_length": 64
|
| 11 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|