DragonLineageAI
/

Vi-SparkTTS-0.5B

Model card Files Files and versions

ancv commited on Apr 5, 2025

Commit

94051fb

·

verified ·

1 Parent(s): 5491367

Delete config.json

Files changed (1) hide show

config.json +0 -90

config.json DELETED Viewed

@@ -1,90 +0,0 @@
-{
-    "model_type": "spark-tts",
-    "architectures": [
-      "SparkTTSModel"
-    ],
-    "auto_map": {
-      "AutoConfig": "configuration_spark_tts.SparkTTSConfig",
-      "AutoModel": "modeling_spark_tts.SparkTTSModel",
-      "AutoProcessor": "processing_spark_tts.SparkTTSProcessor"
-    },
-    "processor_class": "processing_spark_tts.SparkTTSProcessor",
-    "custom_pipelines": {
-        "text-to-speech": {
-            "impl": "pipeline_spark_tts.SparkTTSPipeline",
-            "pt": ["AutoModel"]
-        }
-    },
-    "llm_model_name_or_path": "./LLM",
-    "bicodec_model_name_or_path": "./BiCodec",
-    "wav2vec2_model_name_or_path": "./wav2vec2-large-xlsr-53",
-    "sample_rate": 16000,
-    "highpass_cutoff_freq": 40,
-    "latent_hop_length": 320,
-    "ref_segment_duration": 6.0,
-    "volume_normalize": true,
-    "bicodec_config": {
-      "audio_tokenizer": {
-        "mel_params": {
-        "sample_rate": 16000,
-        "n_fft": 1024,
-        "win_length": 640,
-        "hop_length": 320,
-        "mel_fmin": 10,
-        "mel_fmax": null,
-        "num_mels": 128
-        },
-        "encoder": {
-        "input_channels": 1024,
-        "vocos_dim": 384,
-        "vocos_intermediate_dim": 2048,
-        "vocos_num_layers": 12,
-        "out_channels": 1024,
-        "sample_ratios": [1, 1]
-        },
-        "decoder": {
-        "input_channel": 1024,
-        "channels": 1536,
-        "rates": [8, 5, 4, 2],
-        "kernel_sizes": [16, 11, 8, 4]
-        },
-        "quantizer": {
-        "input_dim": 1024,
-        "codebook_size": 8192,
-        "codebook_dim": 8,
-        "commitment": 0.25,
-        "codebook_loss_weight": 2.0,
-        "use_l2_normlize": true,
-        "threshold_ema_dead_code": 0.2
-        },
-        "speaker_encoder": {
-        "input_dim": 128,
-        "out_dim": 1024,
-        "latent_dim": 128,
-        "token_num": 32,
-        "fsq_levels": [4, 4, 4, 4, 4, 4],
-        "fsq_num_quantizers": 1
-        },
-        "prenet": {
-        "input_channels": 1024,
-        "vocos_dim": 384,
-        "vocos_intermediate_dim": 2048,
-        "vocos_num_layers": 12,
-        "out_channels": 1024,
-        "condition_dim": 1024,
-        "sample_ratios": [1, 1],
-        "use_tanh_at_final": false
-        },
-        "postnet": {
-        "input_channels": 1024,
-        "vocos_dim": 384,
-        "vocos_intermediate_dim": 2048,
-        "vocos_num_layers": 6,
-        "out_channels": 1024,
-        "use_tanh_at_final": false
-        }
-    }
-    },
-    "torch_dtype": "bfloat16",
-    "transformers_version": "4.43.1"
-  }