whisper-small-fa / model_config.yaml
arxyzan's picture
Update model_config.yaml
17111ae verified
name: whisper_speech_recognition
config_type: model
vocab_size: 51865
num_mel_bins: 80
encoder_layers: 12
encoder_attention_heads: 12
decoder_layers: 12
decoder_attention_heads: 12
num_hidden_layers: 12
decoder_ffn_dim: 3072
encoder_ffn_dim: 3072
encoder_layerdrop: 0.0
decoder_layerdrop: 0.0
decoder_start_token_id: 50258
use_cache: false
sampling_rate: 16000
is_encoder_decoder: true
activation_function: gelu
d_model: 768
dropout: 0.0
torch_dtype: float32
attention_dropout: 0.0
activation_dropout: 0.0
init_std: 0.02
scale_embedding: false
max_source_positions: 1500
max_target_positions: 448
pad_token_id: 50257
bos_token_id: 50257
eos_token_id: 50257
suppress_tokens: []
begin_suppress_tokens:
- 220
- 50257
use_weighted_layer_sum: false
classifier_proj_size: 256
apply_spec_augment: false
mask_time_prob: 0.05
mask_time_length: 10
mask_time_min_masks: 2
mask_feature_prob: 0.0
mask_feature_length: 10
mask_feature_min_masks: 0
max_new_tokens: 444
generation_config:
alignment_heads: null
begin_suppress_tokens: [220, 50256]
bos_token_id: 50257
decoder_start_token_id: 50258
eos_token_id: 50257
is_multilingual: True
task: transcribe
language: fa
lang_to_id:
<|af|>: 50327
<|am|>: 50334
<|ar|>: 50272
<|as|>: 50350
<|az|>: 50304
<|ba|>: 50355
<|be|>: 50330
<|bg|>: 50292
<|bn|>: 50302
<|bo|>: 50347
<|br|>: 50309
<|bs|>: 50315
<|ca|>: 50270
<|cs|>: 50283
<|cy|>: 50297
<|da|>: 50285
<|de|>: 50261
<|el|>: 50281
<|en|>: 50259
<|es|>: 50262
<|et|>: 50307
<|eu|>: 50310
<|fa|>: 50300
<|fi|>: 50277
<|fo|>: 50338
<|fr|>: 50265
<|gl|>: 50319
<|gu|>: 50333
<|haw|>: 50352
<|ha|>: 50354
<|he|>: 50279
<|hi|>: 50276
<|hr|>: 50291
<|ht|>: 50339
<|hu|>: 50286
<|hy|>: 50312
<|id|>: 50275
<|is|>: 50311
<|it|>: 50274
<|ja|>: 50266
<|jw|>: 50356
<|ka|>: 50329
<|kk|>: 50316
<|km|>: 50323
<|kn|>: 50306
<|ko|>: 50264
<|la|>: 50294
<|lb|>: 50345
<|ln|>: 50353
<|lo|>: 50336
<|lt|>: 50293
<|lv|>: 50301
<|mg|>: 50349
<|mi|>: 50295
<|mk|>: 50308
<|ml|>: 50296
<|mn|>: 50314
<|mr|>: 50320
<|ms|>: 50282
<|mt|>: 50343
<|my|>: 50346
<|ne|>: 50313
<|nl|>: 50271
<|nn|>: 50342
<|no|>: 50288
<|oc|>: 50328
<|pa|>: 50321
<|pl|>: 50269
<|ps|>: 50340
<|pt|>: 50267
<|ro|>: 50284
<|ru|>: 50263
<|sa|>: 50344
<|sd|>: 50332
<|si|>: 50322
<|sk|>: 50298
<|sl|>: 50305
<|sn|>: 50324
<|so|>: 50326
<|sq|>: 50317
<|sr|>: 50303
<|su|>: 50357
<|sv|>: 50273
<|sw|>: 50318
<|ta|>: 50287
<|te|>: 50299
<|tg|>: 50331
<|th|>: 50289
<|tk|>: 50341
<|tl|>: 50348
<|tr|>: 50268
<|tt|>: 50351
<|uk|>: 50280
<|ur|>: 50290
<|uz|>: 50337
<|vi|>: 50278
<|yi|>: 50335
<|yo|>: 50325
<|zh|>: 50260
max_initial_timestamp_index: 50
max_length: 448
no_timestamps_token_id: 50363
pad_token_id: 50257
prev_sot_token_id: 50361
return_timestamps: false
suppress_tokens: null
task_to_id:
transcribe: 50359
translate: 50358