| | from transformers.configuration_utils import PretrainedConfig |
| |
|
| |
|
| | class XvectorConfig(PretrainedConfig): |
| |
|
| | model_type = 'xvector' |
| |
|
| | def __init__( |
| | self, |
| | n_mels=40, |
| | sample_rate=16000, |
| | win_length=25, |
| | hop_length=10, |
| | mean_norm=True, |
| | std_norm=False, |
| | norm_type='sentence', |
| | tdnn_blocks=5, |
| | tdnn_channels=[512, 512, 512, 512, 1500], |
| | tdnn_kernel_sizes=[5, 3, 3, 1, 1], |
| | tdnn_dilations=[1, 2, 3, 1, 1], |
| | hidden_size=512, |
| | num_classes=1251, |
| | loss_fn='aam', |
| | auto_map={ |
| | "AutoConfig": "configuration_xvector.XvectorConfig", |
| | "AutoModel": "modeling_xvector.XvectorModel", |
| | "AutoModelForAudioClassification": "modeling_xvector.XvectorModelForSequenceClassification" |
| | }, |
| | initializer_range=0.02, |
| | **kwargs |
| | ): |
| | |
| | self.n_mels = n_mels |
| | self.sample_rate = sample_rate |
| | self.win_length = win_length |
| | self.hop_length = hop_length |
| |
|
| | |
| | self.mean_norm = mean_norm |
| | self.std_norm = std_norm |
| | self.norm_type = norm_type |
| |
|
| | |
| | self.tdnn_blocks = tdnn_blocks |
| | self.tdnn_channels = tdnn_channels |
| | self.tdnn_kernel_sizes = tdnn_kernel_sizes |
| | self.tdnn_dilations = tdnn_dilations |
| | self.hidden_size = hidden_size |
| |
|
| | |
| | self.num_classes = num_classes |
| | self.loss_fn = loss_fn |
| |
|
| | |
| | self.auto_map = auto_map |
| | self.initializer_range = initializer_range |
| |
|
| | super().__init__(**kwargs) |
| |
|