from transformers import PretrainedConfig class SoundStreamConfig(PretrainedConfig): model_type = "soundstream" def __init__( self, channels: int = 32, latent_dim: int = 512, codebook_size: int = 1024, num_quantizers: int = 8, **kwargs, ): super().__init__(**kwargs) self.channels = channels self.latent_dim = latent_dim self.codebook_size = codebook_size self.num_quantizers = num_quantizers