Spaces:

Swagcrew
/

fish-quant-samples

Runtime error

App Files Files Community

fish-quant-samples / gen_samples.py

Swagcrew

Upload gen_samples.py with huggingface_hub

696bef2 verified 4 days ago

raw

history blame contribute delete

6.67 kB

	#!/usr/bin/env python3
	"""Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants."""
	import os, sys, json, time, gc, traceback, subprocess
	import torch

	os.environ["TOKENIZERS_PARALLELISM"] = "false"
	os.environ["HF_HOME"] = "/tmp/hf_cache"
	sys.path.insert(0, "/app/fish-speech")

	GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
	REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
	OUT = "/tmp/samples"
	REF_AUDIO = "/app/reference/morgan_ref.wav"
	os.makedirs(OUT, exist_ok=True)

	# === PART 1: Python-based models (bf16, fp8, gptq) ===
	PYTHON_MODELS = [
	("baseline_bf16", "fishaudio/s2-pro"),
	("fp8", "drbaph/s2-pro-fp8"),
	("gptq_w4a16", "baicai1145/s2-pro-w4a16"),
	]

	def gen_python_models():
	print("\n" + "="*60)
	print(" PART 1: Python-based models (bf16, fp8, gptq)")
	print("="*60)

	for name, model_id in PYTHON_MODELS:
	print(f"\n [{name}] ({model_id})")

	local_dir = f"/tmp/models/{name}"
	if not os.path.exists(f"{local_dir}/config.json"):
	from huggingface_hub import snapshot_download
	snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))

	out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
	semantic_dir = f"{OUT}/{name}_semantic"
	os.makedirs(semantic_dir, exist_ok=True)

	cmd = [
	sys.executable, "-m", "fish_speech.models.text2semantic.inference",
	"--text", f"<\|speaker:0\|>{GEN_TEXT}",
	"--prompt-audio", REF_AUDIO,
	"--prompt-text", REF_TEXT,
	"--checkpoint-path", local_dir,
	"--output-dir", semantic_dir,
	"--output", out_path,
	"--num-samples", "1",
	"--max-new-tokens", "1024",
	"--top-p", "0.7",
	"--top-k", "30",
	"--temperature", "0.7",
	"--no-iterative-prompt",
	"--chunk-length", "0",
	"--device", "cuda",
	]

	env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
	result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)

	if os.path.exists(out_path):
	import soundfile as sf
	data, sr = sf.read(out_path)
	dur = len(data) / sr
	print(f" ✅ {out_path} ({dur:.1f}s)")
	else:
	print(f" ❌ Failed: {result.stderr[-200:]}")

	# === PART 2: GGUF models via s2.cpp ===
	GGUF_MODELS = [
	("gguf_q8_0", "s2-pro-q8_0.gguf"),
	("gguf_q6_k", "s2-pro-q6_k.gguf"),
	("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"),
	("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"),
	("gguf_q3_k", "s2-pro-q3_k.gguf"),
	("gguf_q2_k", "s2-pro-q2_k.gguf"),
	]

	def build_s2cpp():
	"""Build s2.cpp with CUDA support."""
	print("\n Building s2.cpp with CUDA...")
	s2dir = "/tmp/s2.cpp"
	if not os.path.exists(f"{s2dir}/build/s2"):
	subprocess.run(["git", "clone", "--recurse-submodules",
	"https://github.com/rodrigomatta/s2.cpp.git", s2dir],
	capture_output=True, timeout=120)
	subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"],
	cwd=s2dir, capture_output=True, timeout=60)
	subprocess.run(["cmake", "--build", "build", "--parallel"],
	cwd=s2dir, capture_output=True, timeout=300)

	if os.path.exists(f"{s2dir}/build/s2"):
	print(" ✅ s2.cpp built")
	return f"{s2dir}/build/s2"
	return None

	def gen_gguf_models():
	print("\n" + "="*60)
	print(" PART 2: GGUF models via s2.cpp")
	print("="*60)

	s2bin = build_s2cpp()
	if not s2bin:
	print(" ❌ Failed to build s2.cpp")
	return

	# Download GGUF models
	from huggingface_hub import hf_hub_download
	gguf_dir = "/tmp/gguf_models"
	os.makedirs(gguf_dir, exist_ok=True)

	# Download tokenizer
	tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir)

	for name, gguf_file in GGUF_MODELS:
	print(f"\n [{name}] ({gguf_file})")

	# Download model
	model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir)
	out_path = f"{OUT}/fish_{name}_morgan_clone.wav"

	cmd = [
	s2bin,
	"-m", model_path,
	"-t", tok_path,
	"-pa", REF_AUDIO,
	"-pt", REF_TEXT,
	"-text", GEN_TEXT,
	"-c", "0", # CUDA device 0
	"-o", out_path,
	]

	result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)

	if os.path.exists(out_path):
	import soundfile as sf
	data, sr = sf.read(out_path)
	dur = len(data) / sr
	print(f" ✅ {out_path} ({dur:.1f}s)")
	else:
	print(f" ❌ Failed: {result.stderr[-200:]}")

	# === MAIN ===
	def main():
	print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===")
	print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
	print(f"Text: {GEN_TEXT}")
	print(f"Ref: {REF_AUDIO}")

	gen_python_models()
	gen_gguf_models()

	# Upload all samples
	print(f"\n{'='*60}")
	print(f" UPLOADING ALL SAMPLES")
	print(f"{'='*60}")

	import soundfile as sf
	results = []
	for fn in sorted(os.listdir(OUT)):
	if fn.endswith(".wav"):
	fpath = os.path.join(OUT, fn)
	data, sr = sf.read(fpath)
	dur = len(data) / sr
	results.append((fn, dur, os.path.getsize(fpath)/1024))

	for fn, dur, sz in results:
	print(f" {fn}: {dur:.1f}s, {sz:.0f}KB")

	try:
	from huggingface_hub import HfApi
	api = HfApi()
	repo = "Swagcrew/fish-speech-s2-quantized"
	for fn in sorted(os.listdir(OUT)):
	if fn.endswith(".wav"):
	api.upload_file(
	path_or_fileobj=os.path.join(OUT, fn),
	path_in_repo=f"samples/{fn}",
	repo_id=repo,
	repo_type="model"
	)
	print(f" Uploaded samples/{fn}")
	print(f"\n 🔗 https://huggingface.co/{repo}/tree/main/samples")
	except Exception as e:
	print(f" Upload error: {e}")

	print("\nDONE!")

	if __name__ == "__main__":
	main()