{ "architectures": [ "Mapperatorinator" ], "backbone_config": { "_attn_implementation_autoset": true, "_name_or_path": "openai/whisper-tiny", "architectures": [ "WhisperForConditionalGeneration" ], "begin_suppress_tokens": null, "bos_token_id": 1, "d_model": 384, "decoder_attention_heads": 6, "decoder_ffn_dim": 1536, "decoder_layers": 4, "decoder_start_token_id": 1, "encoder_attention_heads": 6, "encoder_ffn_dim": 1536, "encoder_layers": 4, "eos_token_id": 2, "forced_decoder_ids": null, "max_length": 448, "max_source_positions": 1024, "max_target_positions": 2560, "model_type": "whisper", "num_hidden_layers": 4, "num_mel_bins": 464, "pad_token_id": 0, "tie_word_embeddings": false, "torch_dtype": "float32", "use_cache": false, "vocab_size": 2968 }, "backbone_model_name": "openai/whisper-tiny", "bos_token_id": 1, "cond_dim": 128, "cond_embeds": 3, "decoder_start_token_id": 1, "disable_compile": true, "do_sample": true, "do_style_embed": false, "embed_decoder_input": true, "eos_token_id": 2, "hidden_size": 384, "hop_length": 128, "init_std": 0.02, "input_features": true, "is_encoder_decoder": true, "max_length": 2048, "max_source_positions": 1024, "max_target_positions": 2560, "model_type": "mapperatorinator", "n_fft": 1024, "n_mels": 80, "num_attention_heads": 6, "num_classes": 80742, "num_hidden_layers": 4, "num_mappers": 3731, "pad_token_id": 0, "rhythm_token_end": 1641, "rhythm_token_start": 3, "rhythm_weight": 1.0, "sample_rate": 16000, "top_k": 0, "torch_dtype": "float32", "transformers_version": "4.47.0", "vocab_size": 2968, "vocab_size_in": 6700 }