Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants.""" | |
| import os, sys, json, time, gc, traceback, subprocess | |
| import torch | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| os.environ["HF_HOME"] = "/tmp/hf_cache" | |
| sys.path.insert(0, "/app/fish-speech") | |
| GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another." | |
| REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person." | |
| OUT = "/tmp/samples" | |
| REF_AUDIO = "/app/reference/morgan_ref.wav" | |
| os.makedirs(OUT, exist_ok=True) | |
| # === PART 1: Python-based models (bf16, fp8, gptq) === | |
| PYTHON_MODELS = [ | |
| ("baseline_bf16", "fishaudio/s2-pro"), | |
| ("fp8", "drbaph/s2-pro-fp8"), | |
| ("gptq_w4a16", "baicai1145/s2-pro-w4a16"), | |
| ] | |
| def gen_python_models(): | |
| print("\n" + "="*60) | |
| print(" PART 1: Python-based models (bf16, fp8, gptq)") | |
| print("="*60) | |
| for name, model_id in PYTHON_MODELS: | |
| print(f"\n [{name}] ({model_id})") | |
| local_dir = f"/tmp/models/{name}" | |
| if not os.path.exists(f"{local_dir}/config.json"): | |
| from huggingface_hub import snapshot_download | |
| snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN")) | |
| out_path = f"{OUT}/fish_{name}_morgan_clone.wav" | |
| semantic_dir = f"{OUT}/{name}_semantic" | |
| os.makedirs(semantic_dir, exist_ok=True) | |
| cmd = [ | |
| sys.executable, "-m", "fish_speech.models.text2semantic.inference", | |
| "--text", f"<|speaker:0|>{GEN_TEXT}", | |
| "--prompt-audio", REF_AUDIO, | |
| "--prompt-text", REF_TEXT, | |
| "--checkpoint-path", local_dir, | |
| "--output-dir", semantic_dir, | |
| "--output", out_path, | |
| "--num-samples", "1", | |
| "--max-new-tokens", "1024", | |
| "--top-p", "0.7", | |
| "--top-k", "30", | |
| "--temperature", "0.7", | |
| "--no-iterative-prompt", | |
| "--chunk-length", "0", | |
| "--device", "cuda", | |
| ] | |
| env = {**os.environ, "PYTHONPATH": "/app/fish-speech"} | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env) | |
| if os.path.exists(out_path): | |
| import soundfile as sf | |
| data, sr = sf.read(out_path) | |
| dur = len(data) / sr | |
| print(f" β {out_path} ({dur:.1f}s)") | |
| else: | |
| print(f" β Failed: {result.stderr[-200:]}") | |
| # === PART 2: GGUF models via s2.cpp === | |
| GGUF_MODELS = [ | |
| ("gguf_q8_0", "s2-pro-q8_0.gguf"), | |
| ("gguf_q6_k", "s2-pro-q6_k.gguf"), | |
| ("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"), | |
| ("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"), | |
| ("gguf_q3_k", "s2-pro-q3_k.gguf"), | |
| ("gguf_q2_k", "s2-pro-q2_k.gguf"), | |
| ] | |
| def build_s2cpp(): | |
| """Build s2.cpp with CUDA support.""" | |
| print("\n Building s2.cpp with CUDA...") | |
| s2dir = "/tmp/s2.cpp" | |
| if not os.path.exists(f"{s2dir}/build/s2"): | |
| subprocess.run(["git", "clone", "--recurse-submodules", | |
| "https://github.com/rodrigomatta/s2.cpp.git", s2dir], | |
| capture_output=True, timeout=120) | |
| subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"], | |
| cwd=s2dir, capture_output=True, timeout=60) | |
| subprocess.run(["cmake", "--build", "build", "--parallel"], | |
| cwd=s2dir, capture_output=True, timeout=300) | |
| if os.path.exists(f"{s2dir}/build/s2"): | |
| print(" β s2.cpp built") | |
| return f"{s2dir}/build/s2" | |
| return None | |
| def gen_gguf_models(): | |
| print("\n" + "="*60) | |
| print(" PART 2: GGUF models via s2.cpp") | |
| print("="*60) | |
| s2bin = build_s2cpp() | |
| if not s2bin: | |
| print(" β Failed to build s2.cpp") | |
| return | |
| # Download GGUF models | |
| from huggingface_hub import hf_hub_download | |
| gguf_dir = "/tmp/gguf_models" | |
| os.makedirs(gguf_dir, exist_ok=True) | |
| # Download tokenizer | |
| tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir) | |
| for name, gguf_file in GGUF_MODELS: | |
| print(f"\n [{name}] ({gguf_file})") | |
| # Download model | |
| model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir) | |
| out_path = f"{OUT}/fish_{name}_morgan_clone.wav" | |
| cmd = [ | |
| s2bin, | |
| "-m", model_path, | |
| "-t", tok_path, | |
| "-pa", REF_AUDIO, | |
| "-pt", REF_TEXT, | |
| "-text", GEN_TEXT, | |
| "-c", "0", # CUDA device 0 | |
| "-o", out_path, | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) | |
| if os.path.exists(out_path): | |
| import soundfile as sf | |
| data, sr = sf.read(out_path) | |
| dur = len(data) / sr | |
| print(f" β {out_path} ({dur:.1f}s)") | |
| else: | |
| print(f" β Failed: {result.stderr[-200:]}") | |
| # === MAIN === | |
| def main(): | |
| print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===") | |
| print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB") | |
| print(f"Text: {GEN_TEXT}") | |
| print(f"Ref: {REF_AUDIO}") | |
| gen_python_models() | |
| gen_gguf_models() | |
| # Upload all samples | |
| print(f"\n{'='*60}") | |
| print(f" UPLOADING ALL SAMPLES") | |
| print(f"{'='*60}") | |
| import soundfile as sf | |
| results = [] | |
| for fn in sorted(os.listdir(OUT)): | |
| if fn.endswith(".wav"): | |
| fpath = os.path.join(OUT, fn) | |
| data, sr = sf.read(fpath) | |
| dur = len(data) / sr | |
| results.append((fn, dur, os.path.getsize(fpath)/1024)) | |
| for fn, dur, sz in results: | |
| print(f" {fn}: {dur:.1f}s, {sz:.0f}KB") | |
| try: | |
| from huggingface_hub import HfApi | |
| api = HfApi() | |
| repo = "Swagcrew/fish-speech-s2-quantized" | |
| for fn in sorted(os.listdir(OUT)): | |
| if fn.endswith(".wav"): | |
| api.upload_file( | |
| path_or_fileobj=os.path.join(OUT, fn), | |
| path_in_repo=f"samples/{fn}", | |
| repo_id=repo, | |
| repo_type="model" | |
| ) | |
| print(f" Uploaded samples/{fn}") | |
| print(f"\n π https://huggingface.co/{repo}/tree/main/samples") | |
| except Exception as e: | |
| print(f" Upload error: {e}") | |
| print("\nDONE!") | |
| if __name__ == "__main__": | |
| main() | |