Spaces:

Swagcrew
/

fish-quant-samples

Runtime error

File size: 6,669 Bytes

#!/usr/bin/env python3
"""Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants."""
import os, sys, json, time, gc, traceback, subprocess
import torch

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HOME"] = "/tmp/hf_cache"
sys.path.insert(0, "/app/fish-speech")

GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
OUT = "/tmp/samples"
REF_AUDIO = "/app/reference/morgan_ref.wav"
os.makedirs(OUT, exist_ok=True)

# === PART 1: Python-based models (bf16, fp8, gptq) ===
PYTHON_MODELS = [
    ("baseline_bf16", "fishaudio/s2-pro"),
    ("fp8", "drbaph/s2-pro-fp8"),
    ("gptq_w4a16", "baicai1145/s2-pro-w4a16"),
]

def gen_python_models():
    print("\n" + "="*60)
    print("  PART 1: Python-based models (bf16, fp8, gptq)")
    print("="*60)

    for name, model_id in PYTHON_MODELS:
        print(f"\n  [{name}] ({model_id})")

        local_dir = f"/tmp/models/{name}"
        if not os.path.exists(f"{local_dir}/config.json"):
            from huggingface_hub import snapshot_download
            snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))

        out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
        semantic_dir = f"{OUT}/{name}_semantic"
        os.makedirs(semantic_dir, exist_ok=True)

        cmd = [
            sys.executable, "-m", "fish_speech.models.text2semantic.inference",
            "--text", f"<|speaker:0|>{GEN_TEXT}",
            "--prompt-audio", REF_AUDIO,
            "--prompt-text", REF_TEXT,
            "--checkpoint-path", local_dir,
            "--output-dir", semantic_dir,
            "--output", out_path,
            "--num-samples", "1",
            "--max-new-tokens", "1024",
            "--top-p", "0.7",
            "--top-k", "30",
            "--temperature", "0.7",
            "--no-iterative-prompt",
            "--chunk-length", "0",
            "--device", "cuda",
        ]

        env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)

        if os.path.exists(out_path):
            import soundfile as sf
            data, sr = sf.read(out_path)
            dur = len(data) / sr
            print(f"    ✅ {out_path} ({dur:.1f}s)")
        else:
            print(f"    ❌ Failed: {result.stderr[-200:]}")

# === PART 2: GGUF models via s2.cpp ===
GGUF_MODELS = [
    ("gguf_q8_0", "s2-pro-q8_0.gguf"),
    ("gguf_q6_k", "s2-pro-q6_k.gguf"),
    ("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"),
    ("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"),
    ("gguf_q3_k", "s2-pro-q3_k.gguf"),
    ("gguf_q2_k", "s2-pro-q2_k.gguf"),
]

def build_s2cpp():
    """Build s2.cpp with CUDA support."""
    print("\n  Building s2.cpp with CUDA...")
    s2dir = "/tmp/s2.cpp"
    if not os.path.exists(f"{s2dir}/build/s2"):
        subprocess.run(["git", "clone", "--recurse-submodules", 
                       "https://github.com/rodrigomatta/s2.cpp.git", s2dir],
                      capture_output=True, timeout=120)
        subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"],
                      cwd=s2dir, capture_output=True, timeout=60)
        subprocess.run(["cmake", "--build", "build", "--parallel"], 
                      cwd=s2dir, capture_output=True, timeout=300)
    
    if os.path.exists(f"{s2dir}/build/s2"):
        print("    ✅ s2.cpp built")
        return f"{s2dir}/build/s2"
    return None

def gen_gguf_models():
    print("\n" + "="*60)
    print("  PART 2: GGUF models via s2.cpp")
    print("="*60)

    s2bin = build_s2cpp()
    if not s2bin:
        print("    ❌ Failed to build s2.cpp")
        return

    # Download GGUF models
    from huggingface_hub import hf_hub_download
    gguf_dir = "/tmp/gguf_models"
    os.makedirs(gguf_dir, exist_ok=True)

    # Download tokenizer
    tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir)

    for name, gguf_file in GGUF_MODELS:
        print(f"\n  [{name}] ({gguf_file})")

        # Download model
        model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir)
        out_path = f"{OUT}/fish_{name}_morgan_clone.wav"

        cmd = [
            s2bin,
            "-m", model_path,
            "-t", tok_path,
            "-pa", REF_AUDIO,
            "-pt", REF_TEXT,
            "-text", GEN_TEXT,
            "-c", "0",  # CUDA device 0
            "-o", out_path,
        ]

        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)

        if os.path.exists(out_path):
            import soundfile as sf
            data, sr = sf.read(out_path)
            dur = len(data) / sr
            print(f"    ✅ {out_path} ({dur:.1f}s)")
        else:
            print(f"    ❌ Failed: {result.stderr[-200:]}")

# === MAIN ===
def main():
    print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===")
    print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
    print(f"Text: {GEN_TEXT}")
    print(f"Ref: {REF_AUDIO}")

    gen_python_models()
    gen_gguf_models()

    # Upload all samples
    print(f"\n{'='*60}")
    print(f"  UPLOADING ALL SAMPLES")
    print(f"{'='*60}")

    import soundfile as sf
    results = []
    for fn in sorted(os.listdir(OUT)):
        if fn.endswith(".wav"):
            fpath = os.path.join(OUT, fn)
            data, sr = sf.read(fpath)
            dur = len(data) / sr
            results.append((fn, dur, os.path.getsize(fpath)/1024))

    for fn, dur, sz in results:
        print(f"  {fn}: {dur:.1f}s, {sz:.0f}KB")

    try:
        from huggingface_hub import HfApi
        api = HfApi()
        repo = "Swagcrew/fish-speech-s2-quantized"
        for fn in sorted(os.listdir(OUT)):
            if fn.endswith(".wav"):
                api.upload_file(
                    path_or_fileobj=os.path.join(OUT, fn),
                    path_in_repo=f"samples/{fn}",
                    repo_id=repo,
                    repo_type="model"
                )
                print(f"  Uploaded samples/{fn}")
        print(f"\n  🔗 https://huggingface.co/{repo}/tree/main/samples")
    except Exception as e:
        print(f"  Upload error: {e}")

    print("\nDONE!")

if __name__ == "__main__":
    main()