| from transformers.configuration_utils import PretrainedConfig |
|
|
|
|
| class EcapaConfig(PretrainedConfig): |
|
|
| model_type = 'ecapa' |
|
|
| def __init__( |
| self, |
| n_mels=80, |
| sample_rate=16000, |
| win_length=25, |
| hop_length=10, |
| mean_norm=True, |
| std_norm=False, |
| norm_type='sentence', |
| hidden_size=192, |
| channels=[512, 512, 512, 512, 1536], |
| kernel_sizes=[5, 3, 3, 3, 1], |
| dilations=[1, 2, 3, 4, 1], |
| attention_channels=128, |
| res2net_scale=8, |
| se_channels=128, |
| global_context=True, |
| groups=[1, 1, 1, 1, 1], |
| num_classes=1251, |
| loss_fn='aam', |
| auto_map={ |
| "AutoConfig": "configuration_ecapa.EcapaConfig", |
| "AutoModel": "modeling_ecapa.EcapaModel", |
| }, |
| initializer_range=0.02, |
| **kwargs |
| ): |
| |
| self.n_mels = n_mels |
| self.sample_rate = sample_rate |
| self.win_length = win_length |
| self.hop_length = hop_length |
|
|
| |
| self.mean_norm = mean_norm |
| self.std_norm = std_norm |
| self.norm_type = norm_type |
|
|
| |
| self.channels = channels |
| self.kernel_sizes = kernel_sizes |
| self.attention_channels = attention_channels |
| self.dilations = dilations |
| self.res2net_scale = res2net_scale |
| self.se_channels = se_channels |
| self.global_context = global_context |
| self.groups = groups |
| self.hidden_size = hidden_size |
|
|
| |
| self.num_classes = num_classes |
| self.loss_fn = loss_fn |
|
|
| |
| self.auto_map = auto_map |
| self.initializer_range = initializer_range |
|
|
| super().__init__(**kwargs) |