| { |
| "save_data": null, |
| "share_vocab": true, |
| "tgt_vocab_size": 51866, |
| "n_sample": 0, |
| "decoder_start_token": "<|startoftranscript|>", |
| "tgt_vocab": null, |
| "pad_token": "<|endoftext|>", |
| "skip_empty_level": "silent", |
| "bos_token": "<|endoftext|>", |
| "src_vocab_size": 51866, |
| "vocab_size_multiple": 8, |
| "data": null, |
| "src_vocab": null, |
| "eos_token": "<|endoftext|>", |
| "transforms": [ |
| "huggingface_tokenize" |
| ], |
| "unk_token": "<|endoftext|>", |
| "training": { |
| "quant_type": "", |
| "compute_dtype": "torch.float16", |
| "w_bit": 0, |
| "batch_size": 896, |
| "normalization": "tokens", |
| "accum_count": [ |
| 32 |
| ], |
| "batch_type": "tokens", |
| "group_size": 0, |
| "batch_size_multiple": 1, |
| "accum_steps": [ |
| 0 |
| ], |
| "quant_layers": [], |
| "valid_batch_size": 256 |
| }, |
| "model": { |
| "heads_kv": 20, |
| "add_qkvbias": true, |
| "left_pad": false, |
| "attn_scaling": null, |
| "transformer_ff": 5120, |
| "heads": 20, |
| "first_k_dense_replace": 0, |
| "generator_bias": false, |
| "position_encoding_type": "Learned", |
| "no_timestamps_token_id": 50364, |
| "mlp_activation_fn": "gelu", |
| "sliding_window": 4096, |
| "layers": 32, |
| "huggingface_model": "openai/whisper-large-v3", |
| "architecture": "whisper", |
| "norm_eps": 1e-05, |
| "moe_transformer_ff": null, |
| "num_experts_per_tok": 0, |
| "layer_norm": "standard", |
| "num_shared_experts": 0, |
| "begin_suppress_tokens": [ |
| 220, |
| 50257 |
| ], |
| "shared_layer_norm": false, |
| "suppress_tokens": [ |
| 1, |
| 2, |
| 7, |
| 8, |
| 9, |
| 10, |
| 14, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 31, |
| 58, |
| 59, |
| 60, |
| 61, |
| 62, |
| 63, |
| 90, |
| 91, |
| 92, |
| 93, |
| 359, |
| 503, |
| 522, |
| 542, |
| 873, |
| 893, |
| 902, |
| 918, |
| 922, |
| 931, |
| 1350, |
| 1853, |
| 1982, |
| 2460, |
| 2627, |
| 3246, |
| 3253, |
| 3268, |
| 3536, |
| 3846, |
| 3961, |
| 4183, |
| 4667, |
| 6585, |
| 6647, |
| 7273, |
| 9061, |
| 9383, |
| 10428, |
| 10929, |
| 11938, |
| 12033, |
| 12331, |
| 12562, |
| 13793, |
| 14157, |
| 14635, |
| 15265, |
| 15618, |
| 16553, |
| 16604, |
| 18362, |
| 18956, |
| 20075, |
| 21675, |
| 22520, |
| 26130, |
| 26161, |
| 26435, |
| 28279, |
| 29464, |
| 31650, |
| 32302, |
| 32470, |
| 36865, |
| 42863, |
| 47425, |
| 49870, |
| 50254, |
| 50258, |
| 50359, |
| 50360, |
| 50361, |
| 50362, |
| 50363 |
| ], |
| "head_dim": null, |
| "hidden_size": 1280, |
| "share_decoder_embeddings": true, |
| "adapter_bias": false, |
| "word_timestamp_heads": [ |
| [ |
| 7, |
| 0 |
| ], |
| [ |
| 10, |
| 17 |
| ], |
| [ |
| 12, |
| 18 |
| ], |
| [ |
| 13, |
| 12 |
| ], |
| [ |
| 16, |
| 1 |
| ], |
| [ |
| 17, |
| 14 |
| ], |
| [ |
| 19, |
| 11 |
| ], |
| [ |
| 21, |
| 4 |
| ], |
| [ |
| 24, |
| 1 |
| ], |
| [ |
| 25, |
| 6 |
| ] |
| ], |
| "add_ffnbias": true, |
| "add_key_bias": false, |
| "parallel_residual": false, |
| "num_experts": 0, |
| "add_final_linear_bias": true, |
| "decoder": { |
| "heads_kv": 20, |
| "decoder_type": "transformer", |
| "layer_norm": "standard", |
| "num_shared_experts": 0, |
| "add_qkvbias": true, |
| "attn_scaling": null, |
| "shared_layer_norm": false, |
| "tgt_word_vec_size": 1280, |
| "transformer_ff": 5120, |
| "first_k_dense_replace": 0, |
| "heads": 20, |
| "head_dim": null, |
| "hidden_size": 1280, |
| "position_encoding_type": "Learned", |
| "mlp_activation_fn": "gelu", |
| "add_ffnbias": true, |
| "sliding_window": 4096, |
| "layers": 32, |
| "add_key_bias": false, |
| "n_positions": 448, |
| "norm_eps": 1e-05, |
| "parallel_residual": false, |
| "num_experts": 0, |
| "moe_transformer_ff": null, |
| "add_final_linear_bias": true, |
| "num_experts_per_tok": 0 |
| }, |
| "embeddings": { |
| "src_word_vec_size": 1280, |
| "position_encoding_type": "Learned", |
| "tgt_word_vec_size": 1280, |
| "n_positions": 448 |
| }, |
| "encoder": { |
| "heads_kv": 20, |
| "src_word_vec_size": 1280, |
| "layer_norm": "standard", |
| "num_shared_experts": 0, |
| "add_qkvbias": true, |
| "encoder_type": "whisper", |
| "attn_scaling": null, |
| "shared_layer_norm": false, |
| "transformer_ff": 5120, |
| "heads": 20, |
| "first_k_dense_replace": 0, |
| "head_dim": null, |
| "hidden_size": 1280, |
| "position_encoding_type": null, |
| "num_mel_bins": 128, |
| "mlp_activation_fn": "gelu", |
| "add_ffnbias": true, |
| "sliding_window": 4096, |
| "layers": 32, |
| "add_key_bias": false, |
| "norm_eps": 1e-05, |
| "parallel_residual": false, |
| "num_experts": 0, |
| "moe_transformer_ff": null, |
| "max_source_positions": 1500, |
| "add_final_linear_bias": true, |
| "num_experts_per_tok": 0 |
| } |
| }, |
| "transforms_configs": { |
| "huggingface_tokenize": { |
| "path": "/scratch/models/whisper-large-v3-eole/tokenizer.json", |
| "huggingface_model": "openai/whisper-large-v3" |
| } |
| }, |
| "inference": { |
| "optional_eos": [], |
| "max_length": 448, |
| "chat_template": null |
| } |
| } |