takuM23 commited on
Commit
c306f26
·
verified ·
1 Parent(s): 8e52e0f

Delete ngwanda-tokenizer-hf

Browse files
ngwanda-tokenizer-hf/tokenizer.json DELETED
@@ -1,134 +0,0 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<unk>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "<s>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 2,
26
- "content": "</s>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- },
33
- {
34
- "id": 3,
35
- "content": "<|endofturn|>",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 4,
44
- "content": "<|startoftext|>",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
- },
51
- {
52
- "id": 5,
53
- "content": "<pad>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- }
60
- ],
61
- "normalizer": null,
62
- "pre_tokenizer": {
63
- "type": "Metaspace",
64
- "replacement": "▁",
65
- "prepend_scheme": "first",
66
- "split": false
67
- },
68
- "post_processor": {
69
- "type": "TemplateProcessing",
70
- "single": [
71
- {
72
- "Sequence": {
73
- "id": "A",
74
- "type_id": 0
75
- }
76
- }
77
- ],
78
- "pair": [
79
- {
80
- "Sequence": {
81
- "id": "A",
82
- "type_id": 0
83
- }
84
- },
85
- {
86
- "Sequence": {
87
- "id": "B",
88
- "type_id": 1
89
- }
90
- }
91
- ],
92
- "special_tokens": {}
93
- },
94
- "decoder": {
95
- "type": "Sequence",
96
- "decoders": [
97
- {
98
- "type": "Replace",
99
- "pattern": {
100
- "String": "▁"
101
- },
102
- "content": " "
103
- },
104
- {
105
- "type": "ByteFallback"
106
- },
107
- {
108
- "type": "Fuse"
109
- },
110
- {
111
- "type": "Strip",
112
- "content": " ",
113
- "start": 1,
114
- "stop": 0
115
- }
116
- ]
117
- },
118
- "model": {
119
- "type": "BPE",
120
- "dropout": null,
121
- "unk_token": null,
122
- "continuing_subword_prefix": null,
123
- "end_of_word_suffix": null,
124
- "fuse_unk": true,
125
- "byte_fallback": true,
126
- "ignore_merges": false,
127
- "vocab": {
128
- "<unk>": 0,
129
- "<s>": 1,
130
- "</s>": 2
131
- },
132
- "merges": []
133
- }
134
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ngwanda-tokenizer-hf/tokenizer_config.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "add_prefix_space": null,
3
- "backend": "tokenizers",
4
- "bos_token": "<|startoftext|>",
5
- "clean_up_tokenization_spaces": false,
6
- "eos_token": "<|endofturn|>",
7
- "model_max_length": 1000000000000000019884624838656,
8
- "pad_token": "<pad>",
9
- "tokenizer_class": "LlamaTokenizer",
10
- "unk_token": "<unk>",
11
- "use_default_system_prompt": false
12
- }