Spaces:
Sleeping
Sleeping
| from transformers.configuration_utils import PretrainedConfig | |
| from typing import List | |
| class HeartCodecConfig(PretrainedConfig): | |
| model_type = "heartcodec" | |
| def __init__( | |
| self, | |
| # config for rvq | |
| dim: int = 512, | |
| codebook_size: int = 8192, | |
| decay: float = 0.9, | |
| commitment_weight: float = 1.0, | |
| threshold_ema_dead_code: int = 2, | |
| use_cosine_sim: bool = False, | |
| codebook_dim: int = 32, | |
| num_quantizers: int = 8, | |
| # config for diffusion transformer | |
| attention_head_dim: int = 64, | |
| in_channels: int = 1024, | |
| norm_type: str = "ada_norm_single", | |
| num_attention_heads: int = 24, | |
| num_layers: int = 24, | |
| num_layers_2: int = 6, | |
| out_channels: int = 256, | |
| # config for sq codec | |
| num_bands: int = 1, | |
| sample_rate: int = 48000, | |
| causal: bool = True, | |
| num_samples: int = 2, | |
| downsample_factors: List[int] = [3, 4, 4, 4, 5], | |
| downsample_kernel_sizes: List[int] = [6, 8, 8, 8, 10], | |
| upsample_factors: List[int] = [5, 4, 4, 4, 3], | |
| upsample_kernel_sizes: List[int] = [10, 8, 8, 8, 6], | |
| latent_hidden_dim: int = 128, | |
| default_kernel_size: int = 7, | |
| delay_kernel_size: int = 5, | |
| init_channel: int = 64, | |
| res_kernel_size: int = 7, | |
| **kwargs | |
| ): | |
| super().__init__(**kwargs) | |
| self.dim = dim | |
| self.codebook_size = codebook_size | |
| self.decay = decay | |
| self.commitment_weight = commitment_weight | |
| self.threshold_ema_dead_code = threshold_ema_dead_code | |
| self.use_cosine_sim = use_cosine_sim | |
| self.codebook_dim = codebook_dim | |
| self.num_quantizers = num_quantizers | |
| self.attention_head_dim = attention_head_dim | |
| self.in_channels = in_channels | |
| self.norm_type = norm_type | |
| self.num_attention_heads = num_attention_heads | |
| self.num_layers = num_layers | |
| self.num_layers_2 = num_layers_2 | |
| self.out_channels = out_channels | |
| self.num_bands = num_bands | |
| self.sample_rate = sample_rate | |
| self.causal = causal | |
| self.num_samples = num_samples | |
| self.downsample_factors = downsample_factors | |
| self.downsample_kernel_sizes = downsample_kernel_sizes | |
| self.upsample_factors = upsample_factors | |
| self.upsample_kernel_sizes = upsample_kernel_sizes | |
| self.latent_hidden_dim = latent_hidden_dim | |
| self.default_kernel_size = default_kernel_size | |
| self.delay_kernel_size = delay_kernel_size | |
| self.init_channel = init_channel | |
| self.res_kernel_size = res_kernel_size | |