Spaces:
Runtime error
Runtime error
File size: 6,669 Bytes
8c17c76 696bef2 3a38d65 8c17c76 e2f85c6 8c17c76 3a38d65 8c17c76 696bef2 8c17c76 696bef2 e2f85c6 696bef2 e2f85c6 8c17c76 696bef2 8c17c76 696bef2 8c17c76 e2f85c6 8c17c76 3a38d65 696bef2 3a38d65 696bef2 3a38d65 696bef2 3a38d65 60a491a 696bef2 60a491a 696bef2 8c17c76 696bef2 8c17c76 696bef2 8c17c76 696bef2 8c17c76 3a38d65 8c17c76 e2f85c6 8c17c76 696bef2 8c17c76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | #!/usr/bin/env python3
"""Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants."""
import os, sys, json, time, gc, traceback, subprocess
import torch
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HOME"] = "/tmp/hf_cache"
sys.path.insert(0, "/app/fish-speech")
GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
OUT = "/tmp/samples"
REF_AUDIO = "/app/reference/morgan_ref.wav"
os.makedirs(OUT, exist_ok=True)
# === PART 1: Python-based models (bf16, fp8, gptq) ===
PYTHON_MODELS = [
("baseline_bf16", "fishaudio/s2-pro"),
("fp8", "drbaph/s2-pro-fp8"),
("gptq_w4a16", "baicai1145/s2-pro-w4a16"),
]
def gen_python_models():
print("\n" + "="*60)
print(" PART 1: Python-based models (bf16, fp8, gptq)")
print("="*60)
for name, model_id in PYTHON_MODELS:
print(f"\n [{name}] ({model_id})")
local_dir = f"/tmp/models/{name}"
if not os.path.exists(f"{local_dir}/config.json"):
from huggingface_hub import snapshot_download
snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))
out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
semantic_dir = f"{OUT}/{name}_semantic"
os.makedirs(semantic_dir, exist_ok=True)
cmd = [
sys.executable, "-m", "fish_speech.models.text2semantic.inference",
"--text", f"<|speaker:0|>{GEN_TEXT}",
"--prompt-audio", REF_AUDIO,
"--prompt-text", REF_TEXT,
"--checkpoint-path", local_dir,
"--output-dir", semantic_dir,
"--output", out_path,
"--num-samples", "1",
"--max-new-tokens", "1024",
"--top-p", "0.7",
"--top-k", "30",
"--temperature", "0.7",
"--no-iterative-prompt",
"--chunk-length", "0",
"--device", "cuda",
]
env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
if os.path.exists(out_path):
import soundfile as sf
data, sr = sf.read(out_path)
dur = len(data) / sr
print(f" β
{out_path} ({dur:.1f}s)")
else:
print(f" β Failed: {result.stderr[-200:]}")
# === PART 2: GGUF models via s2.cpp ===
GGUF_MODELS = [
("gguf_q8_0", "s2-pro-q8_0.gguf"),
("gguf_q6_k", "s2-pro-q6_k.gguf"),
("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"),
("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"),
("gguf_q3_k", "s2-pro-q3_k.gguf"),
("gguf_q2_k", "s2-pro-q2_k.gguf"),
]
def build_s2cpp():
"""Build s2.cpp with CUDA support."""
print("\n Building s2.cpp with CUDA...")
s2dir = "/tmp/s2.cpp"
if not os.path.exists(f"{s2dir}/build/s2"):
subprocess.run(["git", "clone", "--recurse-submodules",
"https://github.com/rodrigomatta/s2.cpp.git", s2dir],
capture_output=True, timeout=120)
subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"],
cwd=s2dir, capture_output=True, timeout=60)
subprocess.run(["cmake", "--build", "build", "--parallel"],
cwd=s2dir, capture_output=True, timeout=300)
if os.path.exists(f"{s2dir}/build/s2"):
print(" β
s2.cpp built")
return f"{s2dir}/build/s2"
return None
def gen_gguf_models():
print("\n" + "="*60)
print(" PART 2: GGUF models via s2.cpp")
print("="*60)
s2bin = build_s2cpp()
if not s2bin:
print(" β Failed to build s2.cpp")
return
# Download GGUF models
from huggingface_hub import hf_hub_download
gguf_dir = "/tmp/gguf_models"
os.makedirs(gguf_dir, exist_ok=True)
# Download tokenizer
tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir)
for name, gguf_file in GGUF_MODELS:
print(f"\n [{name}] ({gguf_file})")
# Download model
model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir)
out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
cmd = [
s2bin,
"-m", model_path,
"-t", tok_path,
"-pa", REF_AUDIO,
"-pt", REF_TEXT,
"-text", GEN_TEXT,
"-c", "0", # CUDA device 0
"-o", out_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if os.path.exists(out_path):
import soundfile as sf
data, sr = sf.read(out_path)
dur = len(data) / sr
print(f" β
{out_path} ({dur:.1f}s)")
else:
print(f" β Failed: {result.stderr[-200:]}")
# === MAIN ===
def main():
print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===")
print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
print(f"Text: {GEN_TEXT}")
print(f"Ref: {REF_AUDIO}")
gen_python_models()
gen_gguf_models()
# Upload all samples
print(f"\n{'='*60}")
print(f" UPLOADING ALL SAMPLES")
print(f"{'='*60}")
import soundfile as sf
results = []
for fn in sorted(os.listdir(OUT)):
if fn.endswith(".wav"):
fpath = os.path.join(OUT, fn)
data, sr = sf.read(fpath)
dur = len(data) / sr
results.append((fn, dur, os.path.getsize(fpath)/1024))
for fn, dur, sz in results:
print(f" {fn}: {dur:.1f}s, {sz:.0f}KB")
try:
from huggingface_hub import HfApi
api = HfApi()
repo = "Swagcrew/fish-speech-s2-quantized"
for fn in sorted(os.listdir(OUT)):
if fn.endswith(".wav"):
api.upload_file(
path_or_fileobj=os.path.join(OUT, fn),
path_in_repo=f"samples/{fn}",
repo_id=repo,
repo_type="model"
)
print(f" Uploaded samples/{fn}")
print(f"\n π https://huggingface.co/{repo}/tree/main/samples")
except Exception as e:
print(f" Upload error: {e}")
print("\nDONE!")
if __name__ == "__main__":
main()
|