| from transformers import PretrainedConfig | |
| class WavCochConfig(PretrainedConfig): | |
| model_type = "WavCoch.WavCoch" | |
| def __init__( | |
| self, | |
| window_size=1001, | |
| hop_length=80, | |
| encoder_layers=8, | |
| encoder_dim=512, | |
| encoder_kernel_size=3, | |
| decoder_layers=8, | |
| decoder_dim=512, | |
| decoder_kernel_size=9, | |
| entropy_loss_weight=0.001, | |
| commit_loss_weight=0.001, | |
| diversity_gamma=1.0, | |
| codebook_size=8192, | |
| vocab_size=8192, | |
| **kwargs | |
| ): | |
| self.window_size = window_size | |
| self.hop_length = hop_length | |
| self.encoder_layers = encoder_layers | |
| self.encoder_dim = encoder_dim | |
| self.encoder_kernel_size = encoder_kernel_size | |
| self.decoder_layers = decoder_layers | |
| self.decoder_dim = decoder_dim | |
| self.decoder_kernel_size = decoder_kernel_size | |
| self.entropy_loss_weight = entropy_loss_weight | |
| self.commit_loss_weight = commit_loss_weight | |
| self.diversity_gamma = diversity_gamma | |
| self.codebook_size = codebook_size | |
| self.vocab_size = vocab_size | |
| super().__init__(**kwargs) | |