devanshty commited on
Commit
08ebf75
·
verified ·
1 Parent(s): be060b6

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +127 -0
tokenizer_config.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "clean_up_tokenization_spaces": true,
6
+ "eos_token": "<|endoftext|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|startoftranscript|>",
10
+ "<|en|>",
11
+ "<|zh|>",
12
+ "<|de|>",
13
+ "<|es|>",
14
+ "<|ru|>",
15
+ "<|ko|>",
16
+ "<|fr|>",
17
+ "<|ja|>",
18
+ "<|pt|>",
19
+ "<|tr|>",
20
+ "<|pl|>",
21
+ "<|ca|>",
22
+ "<|nl|>",
23
+ "<|ar|>",
24
+ "<|sv|>",
25
+ "<|it|>",
26
+ "<|id|>",
27
+ "<|hi|>",
28
+ "<|fi|>",
29
+ "<|vi|>",
30
+ "<|he|>",
31
+ "<|uk|>",
32
+ "<|el|>",
33
+ "<|ms|>",
34
+ "<|cs|>",
35
+ "<|ro|>",
36
+ "<|da|>",
37
+ "<|hu|>",
38
+ "<|ta|>",
39
+ "<|no|>",
40
+ "<|th|>",
41
+ "<|ur|>",
42
+ "<|hr|>",
43
+ "<|bg|>",
44
+ "<|lt|>",
45
+ "<|la|>",
46
+ "<|mi|>",
47
+ "<|ml|>",
48
+ "<|cy|>",
49
+ "<|sk|>",
50
+ "<|te|>",
51
+ "<|fa|>",
52
+ "<|lv|>",
53
+ "<|bn|>",
54
+ "<|sr|>",
55
+ "<|az|>",
56
+ "<|sl|>",
57
+ "<|kn|>",
58
+ "<|et|>",
59
+ "<|mk|>",
60
+ "<|br|>",
61
+ "<|eu|>",
62
+ "<|is|>",
63
+ "<|hy|>",
64
+ "<|ne|>",
65
+ "<|mn|>",
66
+ "<|bs|>",
67
+ "<|kk|>",
68
+ "<|sq|>",
69
+ "<|sw|>",
70
+ "<|gl|>",
71
+ "<|mr|>",
72
+ "<|pa|>",
73
+ "<|si|>",
74
+ "<|km|>",
75
+ "<|sn|>",
76
+ "<|yo|>",
77
+ "<|so|>",
78
+ "<|af|>",
79
+ "<|oc|>",
80
+ "<|ka|>",
81
+ "<|be|>",
82
+ "<|tg|>",
83
+ "<|sd|>",
84
+ "<|gu|>",
85
+ "<|am|>",
86
+ "<|yi|>",
87
+ "<|lo|>",
88
+ "<|uz|>",
89
+ "<|fo|>",
90
+ "<|ht|>",
91
+ "<|ps|>",
92
+ "<|tk|>",
93
+ "<|nn|>",
94
+ "<|mt|>",
95
+ "<|sa|>",
96
+ "<|lb|>",
97
+ "<|my|>",
98
+ "<|bo|>",
99
+ "<|tl|>",
100
+ "<|mg|>",
101
+ "<|as|>",
102
+ "<|tt|>",
103
+ "<|haw|>",
104
+ "<|ln|>",
105
+ "<|ha|>",
106
+ "<|ba|>",
107
+ "<|jw|>",
108
+ "<|su|>",
109
+ "<|yue|>",
110
+ "<|translate|>",
111
+ "<|transcribe|>",
112
+ "<|startoflm|>",
113
+ "<|startofprev|>",
114
+ "<|nospeech|>",
115
+ "<|notimestamps|>"
116
+ ],
117
+ "is_local": false,
118
+ "language": "hi",
119
+ "local_files_only": false,
120
+ "model_max_length": 1000000000000000019884624838656,
121
+ "pad_token": "<|endoftext|>",
122
+ "predict_timestamps": false,
123
+ "processor_class": "WhisperProcessor",
124
+ "task": "transcribe",
125
+ "tokenizer_class": "WhisperTokenizer",
126
+ "unk_token": "<|endoftext|>"
127
+ }