fish-quant-samples / gen_samples.py
Swagcrew's picture
Upload gen_samples.py with huggingface_hub
696bef2 verified
#!/usr/bin/env python3
"""Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants."""
import os, sys, json, time, gc, traceback, subprocess
import torch
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HOME"] = "/tmp/hf_cache"
sys.path.insert(0, "/app/fish-speech")
GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
OUT = "/tmp/samples"
REF_AUDIO = "/app/reference/morgan_ref.wav"
os.makedirs(OUT, exist_ok=True)
# === PART 1: Python-based models (bf16, fp8, gptq) ===
PYTHON_MODELS = [
("baseline_bf16", "fishaudio/s2-pro"),
("fp8", "drbaph/s2-pro-fp8"),
("gptq_w4a16", "baicai1145/s2-pro-w4a16"),
]
def gen_python_models():
print("\n" + "="*60)
print(" PART 1: Python-based models (bf16, fp8, gptq)")
print("="*60)
for name, model_id in PYTHON_MODELS:
print(f"\n [{name}] ({model_id})")
local_dir = f"/tmp/models/{name}"
if not os.path.exists(f"{local_dir}/config.json"):
from huggingface_hub import snapshot_download
snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))
out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
semantic_dir = f"{OUT}/{name}_semantic"
os.makedirs(semantic_dir, exist_ok=True)
cmd = [
sys.executable, "-m", "fish_speech.models.text2semantic.inference",
"--text", f"<|speaker:0|>{GEN_TEXT}",
"--prompt-audio", REF_AUDIO,
"--prompt-text", REF_TEXT,
"--checkpoint-path", local_dir,
"--output-dir", semantic_dir,
"--output", out_path,
"--num-samples", "1",
"--max-new-tokens", "1024",
"--top-p", "0.7",
"--top-k", "30",
"--temperature", "0.7",
"--no-iterative-prompt",
"--chunk-length", "0",
"--device", "cuda",
]
env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
if os.path.exists(out_path):
import soundfile as sf
data, sr = sf.read(out_path)
dur = len(data) / sr
print(f" βœ… {out_path} ({dur:.1f}s)")
else:
print(f" ❌ Failed: {result.stderr[-200:]}")
# === PART 2: GGUF models via s2.cpp ===
GGUF_MODELS = [
("gguf_q8_0", "s2-pro-q8_0.gguf"),
("gguf_q6_k", "s2-pro-q6_k.gguf"),
("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"),
("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"),
("gguf_q3_k", "s2-pro-q3_k.gguf"),
("gguf_q2_k", "s2-pro-q2_k.gguf"),
]
def build_s2cpp():
"""Build s2.cpp with CUDA support."""
print("\n Building s2.cpp with CUDA...")
s2dir = "/tmp/s2.cpp"
if not os.path.exists(f"{s2dir}/build/s2"):
subprocess.run(["git", "clone", "--recurse-submodules",
"https://github.com/rodrigomatta/s2.cpp.git", s2dir],
capture_output=True, timeout=120)
subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"],
cwd=s2dir, capture_output=True, timeout=60)
subprocess.run(["cmake", "--build", "build", "--parallel"],
cwd=s2dir, capture_output=True, timeout=300)
if os.path.exists(f"{s2dir}/build/s2"):
print(" βœ… s2.cpp built")
return f"{s2dir}/build/s2"
return None
def gen_gguf_models():
print("\n" + "="*60)
print(" PART 2: GGUF models via s2.cpp")
print("="*60)
s2bin = build_s2cpp()
if not s2bin:
print(" ❌ Failed to build s2.cpp")
return
# Download GGUF models
from huggingface_hub import hf_hub_download
gguf_dir = "/tmp/gguf_models"
os.makedirs(gguf_dir, exist_ok=True)
# Download tokenizer
tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir)
for name, gguf_file in GGUF_MODELS:
print(f"\n [{name}] ({gguf_file})")
# Download model
model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir)
out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
cmd = [
s2bin,
"-m", model_path,
"-t", tok_path,
"-pa", REF_AUDIO,
"-pt", REF_TEXT,
"-text", GEN_TEXT,
"-c", "0", # CUDA device 0
"-o", out_path,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if os.path.exists(out_path):
import soundfile as sf
data, sr = sf.read(out_path)
dur = len(data) / sr
print(f" βœ… {out_path} ({dur:.1f}s)")
else:
print(f" ❌ Failed: {result.stderr[-200:]}")
# === MAIN ===
def main():
print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===")
print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
print(f"Text: {GEN_TEXT}")
print(f"Ref: {REF_AUDIO}")
gen_python_models()
gen_gguf_models()
# Upload all samples
print(f"\n{'='*60}")
print(f" UPLOADING ALL SAMPLES")
print(f"{'='*60}")
import soundfile as sf
results = []
for fn in sorted(os.listdir(OUT)):
if fn.endswith(".wav"):
fpath = os.path.join(OUT, fn)
data, sr = sf.read(fpath)
dur = len(data) / sr
results.append((fn, dur, os.path.getsize(fpath)/1024))
for fn, dur, sz in results:
print(f" {fn}: {dur:.1f}s, {sz:.0f}KB")
try:
from huggingface_hub import HfApi
api = HfApi()
repo = "Swagcrew/fish-speech-s2-quantized"
for fn in sorted(os.listdir(OUT)):
if fn.endswith(".wav"):
api.upload_file(
path_or_fileobj=os.path.join(OUT, fn),
path_in_repo=f"samples/{fn}",
repo_id=repo,
repo_type="model"
)
print(f" Uploaded samples/{fn}")
print(f"\n πŸ”— https://huggingface.co/{repo}/tree/main/samples")
except Exception as e:
print(f" Upload error: {e}")
print("\nDONE!")
if __name__ == "__main__":
main()