Spaces:

Swagcrew
/

fish-quant-samples

Runtime error

App Files Files Community

Swagcrew commited on 9 days ago

Commit

696bef2

verified ·

1 Parent(s): 60a491a

Upload gen_samples.py with huggingface_hub

Browse files

Files changed (1) hide show

gen_samples.py +117 -24

gen_samples.py CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Generate voice clone samples using fish-speech CLI."""
 import os, sys, json, time, gc, traceback, subprocess
 import torch
@@ -10,41 +10,41 @@ sys.path.insert(0, "/app/fish-speech")
 GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
 REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
 OUT = "/tmp/samples"
 os.makedirs(OUT, exist_ok=True)
-MODELS = [
     ("baseline_bf16", "fishaudio/s2-pro"),
     ("fp8", "drbaph/s2-pro-fp8"),
 ]
-def main():
-    print(f"=== Fish Speech Voice Clone Sample Generator ===")
-    print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
-    for name, model_id in MODELS:
-        print(f"\n{'='*60}")
-        print(f"  {name.upper()} ({model_id})")
-        print(f"{'='*60}")
         local_dir = f"/tmp/models/{name}"
         if not os.path.exists(f"{local_dir}/config.json"):
-            print(f"  Downloading {model_id}...")
             from huggingface_hub import snapshot_download
             snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))
         out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
-        # Step 1: Generate semantic tokens using the CLI
         semantic_dir = f"{OUT}/{name}_semantic"
         os.makedirs(semantic_dir, exist_ok=True)
         cmd = [
             sys.executable, "-m", "fish_speech.models.text2semantic.inference",
             "--text", f"<|speaker:0|>{GEN_TEXT}",
-            "--prompt-audio", "/app/reference/morgan_ref.wav",
             "--prompt-text", REF_TEXT,
             "--checkpoint-path", local_dir,
             "--output-dir", semantic_dir,
             "--num-samples", "1",
             "--max-new-tokens", "1024",
             "--top-p", "0.7",
@@ -53,27 +53,120 @@ def main():
             "--no-iterative-prompt",
             "--chunk-length", "0",
             "--device", "cuda",
-            "--output", out_path,
         ]
-        print(f"  Generating semantic tokens...")
         env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
         result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
-        print(f"  CLI stdout (last 500): {result.stdout[-500:]}")
-        if result.stderr:
-            print(f"  CLI stderr (last 500): {result.stderr[-500:]}")
         if os.path.exists(out_path):
-            sz = os.path.getsize(out_path)
-            print(f"  ✅ Saved {out_path} ({sz/1024:.0f}KB)")
         else:
-            print(f"  ❌ Output not found: {out_path}")
-    # Upload
     print(f"\n{'='*60}")
-    print(f"  UPLOADING TO HUB")
     print(f"{'='*60}")
     try:
         from huggingface_hub import HfApi
         api = HfApi()
@@ -87,7 +180,7 @@ def main():
                     repo_type="model"
                 )
                 print(f"  Uploaded samples/{fn}")
-        print(f"\n  https://huggingface.co/{repo}/tree/main/samples")
     except Exception as e:
         print(f"  Upload error: {e}")

 #!/usr/bin/env python3
+"""Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants."""
 import os, sys, json, time, gc, traceback, subprocess
 import torch
 GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
 REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
 OUT = "/tmp/samples"
+REF_AUDIO = "/app/reference/morgan_ref.wav"
 os.makedirs(OUT, exist_ok=True)
+# === PART 1: Python-based models (bf16, fp8, gptq) ===
+PYTHON_MODELS = [
     ("baseline_bf16", "fishaudio/s2-pro"),
     ("fp8", "drbaph/s2-pro-fp8"),
+    ("gptq_w4a16", "baicai1145/s2-pro-w4a16"),
 ]
+def gen_python_models():
+    print("\n" + "="*60)
+    print("  PART 1: Python-based models (bf16, fp8, gptq)")
+    print("="*60)
+    for name, model_id in PYTHON_MODELS:
+        print(f"\n  [{name}] ({model_id})")
         local_dir = f"/tmp/models/{name}"
         if not os.path.exists(f"{local_dir}/config.json"):
             from huggingface_hub import snapshot_download
             snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))
         out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
         semantic_dir = f"{OUT}/{name}_semantic"
         os.makedirs(semantic_dir, exist_ok=True)
         cmd = [
             sys.executable, "-m", "fish_speech.models.text2semantic.inference",
             "--text", f"<|speaker:0|>{GEN_TEXT}",
+            "--prompt-audio", REF_AUDIO,
             "--prompt-text", REF_TEXT,
             "--checkpoint-path", local_dir,
             "--output-dir", semantic_dir,
+            "--output", out_path,
             "--num-samples", "1",
             "--max-new-tokens", "1024",
             "--top-p", "0.7",
             "--no-iterative-prompt",
             "--chunk-length", "0",
             "--device", "cuda",
         ]
         env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
         result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
+        if os.path.exists(out_path):
+            import soundfile as sf
+            data, sr = sf.read(out_path)
+            dur = len(data) / sr
+            print(f"    ✅ {out_path} ({dur:.1f}s)")
+        else:
+            print(f"    ❌ Failed: {result.stderr[-200:]}")
+# === PART 2: GGUF models via s2.cpp ===
+GGUF_MODELS = [
+    ("gguf_q8_0", "s2-pro-q8_0.gguf"),
+    ("gguf_q6_k", "s2-pro-q6_k.gguf"),
+    ("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"),
+    ("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"),
+    ("gguf_q3_k", "s2-pro-q3_k.gguf"),
+    ("gguf_q2_k", "s2-pro-q2_k.gguf"),
+]
+def build_s2cpp():
+    """Build s2.cpp with CUDA support."""
+    print("\n  Building s2.cpp with CUDA...")
+    s2dir = "/tmp/s2.cpp"
+    if not os.path.exists(f"{s2dir}/build/s2"):
+        subprocess.run(["git", "clone", "--recurse-submodules",
+                       "https://github.com/rodrigomatta/s2.cpp.git", s2dir],
+                      capture_output=True, timeout=120)
+        subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"],
+                      cwd=s2dir, capture_output=True, timeout=60)
+        subprocess.run(["cmake", "--build", "build", "--parallel"],
+                      cwd=s2dir, capture_output=True, timeout=300)
+    if os.path.exists(f"{s2dir}/build/s2"):
+        print("    ✅ s2.cpp built")
+        return f"{s2dir}/build/s2"
+    return None
+def gen_gguf_models():
+    print("\n" + "="*60)
+    print("  PART 2: GGUF models via s2.cpp")
+    print("="*60)
+    s2bin = build_s2cpp()
+    if not s2bin:
+        print("    ❌ Failed to build s2.cpp")
+        return
+    # Download GGUF models
+    from huggingface_hub import hf_hub_download
+    gguf_dir = "/tmp/gguf_models"
+    os.makedirs(gguf_dir, exist_ok=True)
+    # Download tokenizer
+    tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir)
+    for name, gguf_file in GGUF_MODELS:
+        print(f"\n  [{name}] ({gguf_file})")
+        # Download model
+        model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir)
+        out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
+        cmd = [
+            s2bin,
+            "-m", model_path,
+            "-t", tok_path,
+            "-pa", REF_AUDIO,
+            "-pt", REF_TEXT,
+            "-text", GEN_TEXT,
+            "-c", "0",  # CUDA device 0
+            "-o", out_path,
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
         if os.path.exists(out_path):
+            import soundfile as sf
+            data, sr = sf.read(out_path)
+            dur = len(data) / sr
+            print(f"    ✅ {out_path} ({dur:.1f}s)")
         else:
+            print(f"    ❌ Failed: {result.stderr[-200:]}")
+# === MAIN ===
+def main():
+    print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===")
+    print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
+    print(f"Text: {GEN_TEXT}")
+    print(f"Ref: {REF_AUDIO}")
+    gen_python_models()
+    gen_gguf_models()
+    # Upload all samples
     print(f"\n{'='*60}")
+    print(f"  UPLOADING ALL SAMPLES")
     print(f"{'='*60}")
+    import soundfile as sf
+    results = []
+    for fn in sorted(os.listdir(OUT)):
+        if fn.endswith(".wav"):
+            fpath = os.path.join(OUT, fn)
+            data, sr = sf.read(fpath)
+            dur = len(data) / sr
+            results.append((fn, dur, os.path.getsize(fpath)/1024))
+    for fn, dur, sz in results:
+        print(f"  {fn}: {dur:.1f}s, {sz:.0f}KB")
     try:
         from huggingface_hub import HfApi
         api = HfApi()
                     repo_type="model"
                 )
                 print(f"  Uploaded samples/{fn}")
+        print(f"\n  🔗 https://huggingface.co/{repo}/tree/main/samples")
     except Exception as e:
         print(f"  Upload error: {e}")