Upload VoicePostprocessModel DACVAE checkpoints
Browse files
VoicePostprocessModel/README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# VoicePostprocessModel Checkpoints
|
| 2 |
+
|
| 3 |
+
Trained checkpoints for the DACVAE/PCA recipe in VoicePostprocessModel.
|
| 4 |
+
|
| 5 |
+
Files:
|
| 6 |
+
- `dacvae_estimator_best.pt`: differentiable acoustic-state estimator over Echo-TTS Fish/DAC + PCA latents.
|
| 7 |
+
- `dacvae_editor_general_best.pt`: broad dry-recovery editor trained on room, EQ, distortion, compression, bandwidth, and noise variants.
|
| 8 |
+
- `dacvae_editor_target_reverb_best.pt`: target-only editor trained on the calibrated room/bathroom reverb suite.
|
| 9 |
+
- `dacvae_editor_mixed_corrective_best.pt`: mixed corrective editor fine-tuned on the calibrated suite plus the broad cache.
|
| 10 |
+
|
| 11 |
+
Latent space dependencies:
|
| 12 |
+
- Fish/DAC checkpoint: https://huggingface.co/jordand/fish-s1-dac-min
|
| 13 |
+
- Echo-TTS PCA state: https://huggingface.co/jordand/echo-tts-base/blob/main/pca_state.safetensors
|
VoicePostprocessModel/dacvae_editor_general_best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e687a8fd7558a2e1888205bb9bb27e4de967475094a1b176b4ce222b6dc01bf9
|
| 3 |
+
size 21545193
|
VoicePostprocessModel/dacvae_editor_mixed_corrective_best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f0d7fd9a2a5807908e598dfc3a6d4b4911e3d775882d8bb585663483e3c6912
|
| 3 |
+
size 21545193
|
VoicePostprocessModel/dacvae_editor_target_reverb_best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54a91e5a4e2e193422a60c278eaff821b2755268ee70acf6be9cd259bb28bd9e
|
| 3 |
+
size 21545193
|
VoicePostprocessModel/manifest.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"project": "VoicePostprocessModel",
|
| 3 |
+
"latent_space": "Echo-TTS Fish/DAC z_q projected through Echo-TTS PCA to 80 dimensions",
|
| 4 |
+
"sample_rate": 44100,
|
| 5 |
+
"files": {
|
| 6 |
+
"estimator": "dacvae_estimator_best.pt",
|
| 7 |
+
"editor_general": "dacvae_editor_general_best.pt",
|
| 8 |
+
"editor_target_reverb": "dacvae_editor_target_reverb_best.pt",
|
| 9 |
+
"editor_mixed_corrective": "dacvae_editor_mixed_corrective_best.pt"
|
| 10 |
+
},
|
| 11 |
+
"external_latent_space_assets": {
|
| 12 |
+
"fish_dac": "https://huggingface.co/jordand/fish-s1-dac-min",
|
| 13 |
+
"echo_tts_pca_state": "https://huggingface.co/jordand/echo-tts-base/blob/main/pca_state.safetensors",
|
| 14 |
+
"echo_tts_repo": "https://github.com/jordandarefsky/echo-tts"
|
| 15 |
+
}
|
| 16 |
+
}
|