| { |
| "architectures": [ |
| "EcapaTdnnSpeakerEncoder" |
| ], |
| "auto_map": { |
| "AutoConfig": "configuration_ecapa_tdnn.EcapaTdnnSpeakerEncoderConfig", |
| "AutoModel": "modeling_ecapa_tdnn.EcapaTdnnSpeakerEncoder", |
| "AutoModelForFeatureExtraction": "modeling_ecapa_tdnn.EcapaTdnnSpeakerEncoder", |
| "AutoFeatureExtractor": "feature_extraction_ecapa_tdnn.EcapaTdnnFeatureExtractor", |
| "AutoTokenizer": "tokenizer_ecapa_tdnn.EcapaTdnnDummyTokenizer" |
| }, |
| "model_type": "ecapa_tdnn_speaker_encoder", |
| "mel_dim": 128, |
| "enc_dim": 2048, |
| "enc_channels": [ |
| 512, |
| 512, |
| 512, |
| 512, |
| 1536 |
| ], |
| "enc_kernel_sizes": [ |
| 5, |
| 3, |
| 3, |
| 3, |
| 1 |
| ], |
| "enc_dilations": [ |
| 1, |
| 2, |
| 3, |
| 4, |
| 1 |
| ], |
| "enc_attention_channels": 128, |
| "enc_res2net_scale": 8, |
| "enc_se_channels": 128, |
| "sample_rate": 24000, |
| "pipeline_tag": "feature-extraction", |
| "torch_dtype": "float32", |
| "feature_extractor_type": "EcapaTdnnFeatureExtractor" |
| } |