FILM6912 commited on
Commit
465d79f
·
verified ·
1 Parent(s): 2c785d8

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "_name_or_path": "whisper-small-thai-fin",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
- "apply_spec_augment": true,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
@@ -22,15 +21,28 @@
22
  "encoder_layerdrop": 0.0,
23
  "encoder_layers": 12,
24
  "eos_token_id": 50257,
25
- "forced_decoder_ids": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "init_std": 0.02,
27
  "is_encoder_decoder": true,
28
- "mask_feature_length": 64,
29
  "mask_feature_min_masks": 0,
30
- "mask_feature_prob": 1e-05,
31
  "mask_time_length": 10,
32
  "mask_time_min_masks": 2,
33
- "mask_time_prob": 1e-05,
34
  "max_length": null,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
@@ -41,7 +53,8 @@
41
  "pad_token_id": 50257,
42
  "scale_embedding": false,
43
  "torch_dtype": "bfloat16",
44
- "transformers_version": "4.46.3",
 
45
  "use_cache": true,
46
  "use_weighted_layer_sum": false,
47
  "vocab_size": 51865
 
1
  {
 
2
  "activation_dropout": 0.0,
3
  "activation_function": "gelu",
4
+ "apply_spec_augment": false,
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
 
21
  "encoder_layerdrop": 0.0,
22
  "encoder_layers": 12,
23
  "eos_token_id": 50257,
24
+ "forced_decoder_ids": [
25
+ [
26
+ 1,
27
+ 50259
28
+ ],
29
+ [
30
+ 2,
31
+ 50359
32
+ ],
33
+ [
34
+ 3,
35
+ 50363
36
+ ]
37
+ ],
38
  "init_std": 0.02,
39
  "is_encoder_decoder": true,
40
+ "mask_feature_length": 10,
41
  "mask_feature_min_masks": 0,
42
+ "mask_feature_prob": 0.0,
43
  "mask_time_length": 10,
44
  "mask_time_min_masks": 2,
45
+ "mask_time_prob": 0.05,
46
  "max_length": null,
47
  "max_source_positions": 1500,
48
  "max_target_positions": 448,
 
53
  "pad_token_id": 50257,
54
  "scale_embedding": false,
55
  "torch_dtype": "bfloat16",
56
+ "transformers_version": "4.51.3",
57
+ "unsloth_version": "2025.5.6",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51865
generation_config.json CHANGED
@@ -41,6 +41,7 @@
41
  5
42
  ]
43
  ],
 
44
  "begin_suppress_tokens": [
45
  220,
46
  50257
@@ -150,107 +151,18 @@
150
  "<|yo|>": 50325,
151
  "<|zh|>": 50260
152
  },
153
- "language": "Thai",
154
  "max_initial_timestamp_index": 50,
155
  "max_length": 448,
156
  "no_timestamps_token_id": 50363,
157
  "pad_token_id": 50257,
158
  "prev_sot_token_id": 50361,
159
  "return_timestamps": false,
160
- "suppress_tokens": [
161
- 1,
162
- 2,
163
- 7,
164
- 8,
165
- 9,
166
- 10,
167
- 14,
168
- 25,
169
- 26,
170
- 27,
171
- 28,
172
- 29,
173
- 31,
174
- 58,
175
- 59,
176
- 60,
177
- 61,
178
- 62,
179
- 63,
180
- 90,
181
- 91,
182
- 92,
183
- 93,
184
- 359,
185
- 503,
186
- 522,
187
- 542,
188
- 873,
189
- 893,
190
- 902,
191
- 918,
192
- 922,
193
- 931,
194
- 1350,
195
- 1853,
196
- 1982,
197
- 2460,
198
- 2627,
199
- 3246,
200
- 3253,
201
- 3268,
202
- 3536,
203
- 3846,
204
- 3961,
205
- 4183,
206
- 4667,
207
- 6585,
208
- 6647,
209
- 7273,
210
- 9061,
211
- 9383,
212
- 10428,
213
- 10929,
214
- 11938,
215
- 12033,
216
- 12331,
217
- 12562,
218
- 13793,
219
- 14157,
220
- 14635,
221
- 15265,
222
- 15618,
223
- 16553,
224
- 16604,
225
- 18362,
226
- 18956,
227
- 20075,
228
- 21675,
229
- 22520,
230
- 26130,
231
- 26161,
232
- 26435,
233
- 28279,
234
- 29464,
235
- 31650,
236
- 32302,
237
- 32470,
238
- 36865,
239
- 42863,
240
- 47425,
241
- 49870,
242
- 50254,
243
- 50258,
244
- 50358,
245
- 50359,
246
- 50360,
247
- 50361,
248
- 50362
249
- ],
250
  "task": "transcribe",
251
  "task_to_id": {
252
  "transcribe": 50359,
253
  "translate": 50358
254
  },
255
- "transformers_version": "4.46.3"
256
  }
 
41
  5
42
  ]
43
  ],
44
+ "attn_implementation": "sdpa",
45
  "begin_suppress_tokens": [
46
  220,
47
  50257
 
151
  "<|yo|>": 50325,
152
  "<|zh|>": 50260
153
  },
154
+ "language": "<|th|>",
155
  "max_initial_timestamp_index": 50,
156
  "max_length": 448,
157
  "no_timestamps_token_id": 50363,
158
  "pad_token_id": 50257,
159
  "prev_sot_token_id": 50361,
160
  "return_timestamps": false,
161
+ "suppress_tokens": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  "task": "transcribe",
163
  "task_to_id": {
164
  "transcribe": 50359,
165
  "translate": 50358
166
  },
167
+ "transformers_version": "4.51.3"
168
  }
merges.txt CHANGED
@@ -1,4 +1,5 @@
1
  #version: 0.2
 
2
  Ġ a
3
  Ġt h
4
  i n
 
1
  #version: 0.2
2
+ Ġ t
3
  Ġ a
4
  Ġt h
5
  i n
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84a4fafcfec0026e0123833f8a440220f1f6433cc3c56002565d4aae900cbca1
3
- size 483525680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5bcef43669348f1e68caee216f899dcf0117c868d7583799685b0f81473f282
3
+ size 563189904
preprocessor_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "chunk_length": 30,
 
3
  "feature_extractor_type": "WhisperFeatureExtractor",
4
  "feature_size": 80,
5
  "hop_length": 160,
 
1
  {
2
  "chunk_length": 30,
3
+ "dither": 0.0,
4
  "feature_extractor_type": "WhisperFeatureExtractor",
5
  "feature_size": 80,
6
  "hop_length": 160,
special_tokens_map.json CHANGED
@@ -122,13 +122,7 @@
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
- "pad_token": {
126
- "content": "<|endoftext|>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false
131
- },
132
  "unk_token": {
133
  "content": "<|endoftext|>",
134
  "lstrip": false,
 
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
+ "pad_token": "<|endoftext|>",
 
 
 
 
 
 
126
  "unk_token": {
127
  "content": "<|endoftext|>",
128
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -12980,8 +12980,10 @@
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
 
12983
  "model_max_length": 1024,
12984
  "pad_token": "<|endoftext|>",
 
12985
  "processor_class": "WhisperProcessor",
12986
  "return_attention_mask": false,
12987
  "tokenizer_class": "WhisperTokenizer",
 
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
12983
+ "extra_special_tokens": {},
12984
  "model_max_length": 1024,
12985
  "pad_token": "<|endoftext|>",
12986
+ "padding_side": "right",
12987
  "processor_class": "WhisperProcessor",
12988
  "return_attention_mask": false,
12989
  "tokenizer_class": "WhisperTokenizer",