| { | |
| "transforms": [ | |
| "huggingface_tokenize" | |
| ], | |
| "src_vocab_size": 51865, | |
| "data": null, | |
| "tgt_vocab_size": 51865, | |
| "decoder_start_token": "<|startoftranscript|>", | |
| "src_vocab": null, | |
| "n_sample": 0, | |
| "pad_token": "<|endoftext|>", | |
| "vocab_size_multiple": 8, | |
| "save_data": null, | |
| "bos_token": "<|endoftext|>", | |
| "tgt_vocab": null, | |
| "skip_empty_level": "silent", | |
| "eos_token": "<|endoftext|>", | |
| "unk_token": "<|endoftext|>", | |
| "share_vocab": true, | |
| "training": { | |
| "normalization": "tokens", | |
| "batch_type": "tokens", | |
| "accum_steps": [ | |
| 0 | |
| ], | |
| "valid_batch_size": 256, | |
| "quant_type": "", | |
| "batch_size_multiple": 1, | |
| "group_size": 0, | |
| "quant_layers": [], | |
| "w_bit": 0, | |
| "batch_size": 896, | |
| "compute_dtype": "torch.float32", | |
| "accum_count": [ | |
| 32 | |
| ] | |
| }, | |
| "model": { | |
| "sliding_window": 4096, | |
| "no_timestamps_token_id": 50363, | |
| "first_k_dense_replace": 0, | |
| "attn_scaling": null, | |
| "layer_norm": "standard", | |
| "layers": 12, | |
| "num_experts": 0, | |
| "adapter_bias": false, | |
| "num_shared_experts": 0, | |
| "add_final_linear_bias": true, | |
| "share_decoder_embeddings": true, | |
| "left_pad": false, | |
| "num_experts_per_tok": 0, | |
| "transformer_ff": 3072, | |
| "architecture": "whisper", | |
| "suppress_tokens": [ | |
| 1, | |
| 2, | |
| 7, | |
| 8, | |
| 9, | |
| 10, | |
| 14, | |
| 25, | |
| 26, | |
| 27, | |
| 28, | |
| 29, | |
| 31, | |
| 58, | |
| 59, | |
| 60, | |
| 61, | |
| 62, | |
| 63, | |
| 90, | |
| 91, | |
| 92, | |
| 93, | |
| 359, | |
| 503, | |
| 522, | |
| 542, | |
| 873, | |
| 893, | |
| 902, | |
| 918, | |
| 922, | |
| 931, | |
| 1350, | |
| 1853, | |
| 1982, | |
| 2460, | |
| 2627, | |
| 3246, | |
| 3253, | |
| 3268, | |
| 3536, | |
| 3846, | |
| 3961, | |
| 4183, | |
| 4667, | |
| 6585, | |
| 6647, | |
| 7273, | |
| 9061, | |
| 9383, | |
| 10428, | |
| 10929, | |
| 11938, | |
| 12033, | |
| 12331, | |
| 12562, | |
| 13793, | |
| 14157, | |
| 14635, | |
| 15265, | |
| 15618, | |
| 16553, | |
| 16604, | |
| 18362, | |
| 18956, | |
| 20075, | |
| 21675, | |
| 22520, | |
| 26130, | |
| 26161, | |
| 26435, | |
| 28279, | |
| 29464, | |
| 31650, | |
| 32302, | |
| 32470, | |
| 36865, | |
| 42863, | |
| 47425, | |
| 49870, | |
| 50254, | |
| 50258, | |
| 50358, | |
| 50359, | |
| 50360, | |
| 50361, | |
| 50362 | |
| ], | |
| "parallel_residual": false, | |
| "position_encoding_type": "Learned", | |
| "add_ffnbias": true, | |
| "shared_layer_norm": false, | |
| "norm_eps": 1e-05, | |
| "heads": 12, | |
| "heads_kv": 12, | |
| "huggingface_model": "openai/whisper-small", | |
| "mlp_activation_fn": "gelu", | |
| "hidden_size": 768, | |
| "add_key_bias": false, | |
| "word_timestamp_heads": [ | |
| [ | |
| 5, | |
| 3 | |
| ], | |
| [ | |
| 5, | |
| 9 | |
| ], | |
| [ | |
| 8, | |
| 0 | |
| ], | |
| [ | |
| 8, | |
| 4 | |
| ], | |
| [ | |
| 8, | |
| 7 | |
| ], | |
| [ | |
| 8, | |
| 8 | |
| ], | |
| [ | |
| 9, | |
| 0 | |
| ], | |
| [ | |
| 9, | |
| 7 | |
| ], | |
| [ | |
| 9, | |
| 9 | |
| ], | |
| [ | |
| 10, | |
| 5 | |
| ] | |
| ], | |
| "add_qkvbias": true, | |
| "moe_transformer_ff": null, | |
| "head_dim": null, | |
| "begin_suppress_tokens": [ | |
| 220, | |
| 50257 | |
| ], | |
| "generator_bias": false, | |
| "encoder": { | |
| "src_word_vec_size": 768, | |
| "norm_eps": 1e-05, | |
| "sliding_window": 4096, | |
| "heads": 12, | |
| "heads_kv": 12, | |
| "max_source_positions": 1500, | |
| "encoder_type": "whisper", | |
| "first_k_dense_replace": 0, | |
| "mlp_activation_fn": "gelu", | |
| "hidden_size": 768, | |
| "attn_scaling": null, | |
| "add_key_bias": false, | |
| "add_qkvbias": true, | |
| "layer_norm": "standard", | |
| "layers": 12, | |
| "num_mel_bins": 80, | |
| "moe_transformer_ff": null, | |
| "head_dim": null, | |
| "num_experts": 0, | |
| "num_shared_experts": 0, | |
| "add_final_linear_bias": true, | |
| "num_experts_per_tok": 0, | |
| "transformer_ff": 3072, | |
| "position_encoding_type": null, | |
| "parallel_residual": false, | |
| "add_ffnbias": true, | |
| "shared_layer_norm": false | |
| }, | |
| "embeddings": { | |
| "src_word_vec_size": 768, | |
| "tgt_word_vec_size": 768, | |
| "n_positions": 448, | |
| "position_encoding_type": "Learned" | |
| }, | |
| "decoder": { | |
| "norm_eps": 1e-05, | |
| "n_positions": 448, | |
| "sliding_window": 4096, | |
| "heads": 12, | |
| "heads_kv": 12, | |
| "first_k_dense_replace": 0, | |
| "mlp_activation_fn": "gelu", | |
| "tgt_word_vec_size": 768, | |
| "hidden_size": 768, | |
| "attn_scaling": null, | |
| "decoder_type": "transformer", | |
| "add_key_bias": false, | |
| "add_qkvbias": true, | |
| "layers": 12, | |
| "layer_norm": "standard", | |
| "moe_transformer_ff": null, | |
| "head_dim": null, | |
| "num_experts": 0, | |
| "num_shared_experts": 0, | |
| "add_final_linear_bias": true, | |
| "num_experts_per_tok": 0, | |
| "transformer_ff": 3072, | |
| "position_encoding_type": "Learned", | |
| "parallel_residual": false, | |
| "add_ffnbias": true, | |
| "shared_layer_norm": false | |
| } | |
| }, | |
| "transforms_configs": { | |
| "huggingface_tokenize": { | |
| "huggingface_model": "openai/whisper-small", | |
| "path": "/scratch/models/whisper-small-eole/tokenizer.json" | |
| } | |
| }, | |
| "inference": { | |
| "optional_eos": [], | |
| "max_length": 448, | |
| "chat_template": null | |
| } | |
| } |