{ "architectures": [ "Mapperatorinator" ], "backbone_config": { "_attn_implementation_autoset": true, "_name_or_path": "openai/whisper-base", "architectures": [ "WhisperForConditionalGeneration" ], "begin_suppress_tokens": null, "bos_token_id": 1, "d_model": 512, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layers": 6, "decoder_start_token_id": 1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layers": 6, "eos_token_id": 2, "forced_decoder_ids": null, "max_length": 448, "max_source_positions": 512, "max_target_positions": 2048, "model_type": "whisper", "num_hidden_layers": 6, "num_mel_bins": 512, "pad_token_id": 0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": false, "vocab_size": 3988 }, "backbone_model_name": "openai/whisper-base", "bos_token_id": 1, "decoder_start_token_id": 1, "do_sample": true, "do_style_embed": false, "embed_decoder_input": true, "eos_token_id": 2, "hidden_size": 512, "hop_length": 128, "init_std": 0.02, "input_features": true, "is_encoder_decoder": true, "max_length": 2048, "max_source_positions": 512, "max_target_positions": 2048, "model_type": "mapperatorinator", "n_fft": 1024, "n_mels": 388, "num_attention_heads": 8, "num_classes": 0, "num_hidden_layers": 6, "pad_token_id": 0, "rhythm_token_end": 836, "rhythm_token_start": 17, "rhythm_weight": 3.0, "sample_rate": 16000, "top_k": 0, "torch_dtype": "float32", "transformers_version": "4.46.0", "vocab_size": 3988, "vocab_size_in": 9920 }