FILM6912 commited on
Commit
705ca7b
·
verified ·
1 Parent(s): 379a4f5

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "FILM6912/whisper-tiny-thai",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -16,7 +15,7 @@
16
  "decoder_layerdrop": 0.0,
17
  "decoder_layers": 4,
18
  "decoder_start_token_id": 50258,
19
- "dropout": 0.0,
20
  "encoder_attention_heads": 6,
21
  "encoder_ffn_dim": 1536,
22
  "encoder_layerdrop": 0.0,
@@ -54,7 +53,8 @@
54
  "pad_token_id": 50257,
55
  "scale_embedding": false,
56
  "torch_dtype": "bfloat16",
57
- "transformers_version": "4.46.3",
 
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51865
 
1
  {
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "gelu",
4
  "apply_spec_augment": false,
 
15
  "decoder_layerdrop": 0.0,
16
  "decoder_layers": 4,
17
  "decoder_start_token_id": 50258,
18
+ "dropout": 0.1,
19
  "encoder_attention_heads": 6,
20
  "encoder_ffn_dim": 1536,
21
  "encoder_layerdrop": 0.0,
 
53
  "pad_token_id": 50257,
54
  "scale_embedding": false,
55
  "torch_dtype": "bfloat16",
56
+ "transformers_version": "4.52.1",
57
+ "unsloth_version": "2025.6.8",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51865
generation_config.json CHANGED
@@ -25,6 +25,7 @@
25
  5
26
  ]
27
  ],
 
28
  "begin_suppress_tokens": [
29
  220,
30
  50257
@@ -134,107 +135,18 @@
134
  "<|yo|>": 50325,
135
  "<|zh|>": 50260
136
  },
137
- "language": "Thai",
138
  "max_initial_timestamp_index": 50,
139
  "max_length": 448,
140
  "no_timestamps_token_id": 50363,
141
  "pad_token_id": 50257,
142
  "prev_sot_token_id": 50361,
143
  "return_timestamps": false,
144
- "suppress_tokens": [
145
- 1,
146
- 2,
147
- 7,
148
- 8,
149
- 9,
150
- 10,
151
- 14,
152
- 25,
153
- 26,
154
- 27,
155
- 28,
156
- 29,
157
- 31,
158
- 58,
159
- 59,
160
- 60,
161
- 61,
162
- 62,
163
- 63,
164
- 90,
165
- 91,
166
- 92,
167
- 93,
168
- 359,
169
- 503,
170
- 522,
171
- 542,
172
- 873,
173
- 893,
174
- 902,
175
- 918,
176
- 922,
177
- 931,
178
- 1350,
179
- 1853,
180
- 1982,
181
- 2460,
182
- 2627,
183
- 3246,
184
- 3253,
185
- 3268,
186
- 3536,
187
- 3846,
188
- 3961,
189
- 4183,
190
- 4667,
191
- 6585,
192
- 6647,
193
- 7273,
194
- 9061,
195
- 9383,
196
- 10428,
197
- 10929,
198
- 11938,
199
- 12033,
200
- 12331,
201
- 12562,
202
- 13793,
203
- 14157,
204
- 14635,
205
- 15265,
206
- 15618,
207
- 16553,
208
- 16604,
209
- 18362,
210
- 18956,
211
- 20075,
212
- 21675,
213
- 22520,
214
- 26130,
215
- 26161,
216
- 26435,
217
- 28279,
218
- 29464,
219
- 31650,
220
- 32302,
221
- 32470,
222
- 36865,
223
- 42863,
224
- 47425,
225
- 49870,
226
- 50254,
227
- 50258,
228
- 50358,
229
- 50359,
230
- 50360,
231
- 50361,
232
- 50362
233
- ],
234
  "task": "transcribe",
235
  "task_to_id": {
236
  "transcribe": 50359,
237
  "translate": 50358
238
  },
239
- "transformers_version": "4.46.3"
240
  }
 
25
  5
26
  ]
27
  ],
28
+ "attn_implementation": "sdpa",
29
  "begin_suppress_tokens": [
30
  220,
31
  50257
 
135
  "<|yo|>": 50325,
136
  "<|zh|>": 50260
137
  },
138
+ "language": null,
139
  "max_initial_timestamp_index": 50,
140
  "max_length": 448,
141
  "no_timestamps_token_id": 50363,
142
  "pad_token_id": 50257,
143
  "prev_sot_token_id": 50361,
144
  "return_timestamps": false,
145
+ "suppress_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  "task": "transcribe",
147
  "task_to_id": {
148
  "transcribe": 50359,
149
  "translate": 50358
150
  },
151
+ "transformers_version": "4.52.1"
152
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa77382375181d5daa73ceffd3829403a1a89f2e646d3c9e4b40eb165fbfd42
3
- size 75540336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a583c6617d383cffc1c4c678640e271ee3082de7d5c1e816f267427c51911331
3
+ size 143983808
preprocessor_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "chunk_length": 30,
 
3
  "feature_extractor_type": "WhisperFeatureExtractor",
4
  "feature_size": 80,
5
  "hop_length": 160,
 
1
  {
2
  "chunk_length": 30,
3
+ "dither": 0.0,
4
  "feature_extractor_type": "WhisperFeatureExtractor",
5
  "feature_size": 80,
6
  "hop_length": 160,
special_tokens_map.json CHANGED
@@ -122,13 +122,7 @@
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
- "pad_token": {
126
- "content": "<|endoftext|>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false
131
- },
132
  "unk_token": {
133
  "content": "<|endoftext|>",
134
  "lstrip": false,
 
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
+ "pad_token": "<|endoftext|>",
 
 
 
 
 
 
126
  "unk_token": {
127
  "content": "<|endoftext|>",
128
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -12980,8 +12980,10 @@
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
 
12983
  "model_max_length": 1024,
12984
  "pad_token": "<|endoftext|>",
 
12985
  "processor_class": "WhisperProcessor",
12986
  "return_attention_mask": false,
12987
  "tokenizer_class": "WhisperTokenizer",
 
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
12983
+ "extra_special_tokens": {},
12984
  "model_max_length": 1024,
12985
  "pad_token": "<|endoftext|>",
12986
+ "padding_side": "right",
12987
  "processor_class": "WhisperProcessor",
12988
  "return_attention_mask": false,
12989
  "tokenizer_class": "WhisperTokenizer",