| { | |
| "sample_rate": 24000, | |
| "backbone_class_path": "exp.models.ComVo", | |
| "backbone_init_args": { | |
| "input_channels": 100, | |
| "dim": 1536, | |
| "intermediate_dim": 4608, | |
| "num_layers": 8, | |
| "n_quantization": 128, | |
| "layer_scale_init_value": null, | |
| "adanorm_num_embeddings": null, | |
| "rank": null | |
| }, | |
| "head_class_path": "exp.heads.ISTFTHead", | |
| "head_init_args": { | |
| "dim": 1536, | |
| "n_fft": 1024, | |
| "hop_length": 256, | |
| "padding": "center" | |
| }, | |
| "feature_extractor_class_path": "exp.feature_extractors.MelSpectrogramFeatures", | |
| "feature_extractor_init_args": { | |
| "sample_rate": 24000, | |
| "n_fft": 1024, | |
| "hop_length": 256, | |
| "n_mels": 100, | |
| "padding": "center" | |
| } | |
| } |