Swagcrew commited on
Commit
696bef2
Β·
verified Β·
1 Parent(s): 60a491a

Upload gen_samples.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. gen_samples.py +117 -24
gen_samples.py CHANGED
@@ -1,5 +1,5 @@
1
  #!/usr/bin/env python3
2
- """Generate voice clone samples using fish-speech CLI."""
3
  import os, sys, json, time, gc, traceback, subprocess
4
  import torch
5
 
@@ -10,41 +10,41 @@ sys.path.insert(0, "/app/fish-speech")
10
  GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
11
  REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
12
  OUT = "/tmp/samples"
 
13
  os.makedirs(OUT, exist_ok=True)
14
 
15
- MODELS = [
 
16
  ("baseline_bf16", "fishaudio/s2-pro"),
17
  ("fp8", "drbaph/s2-pro-fp8"),
 
18
  ]
19
 
20
- def main():
21
- print(f"=== Fish Speech Voice Clone Sample Generator ===")
22
- print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
 
23
 
24
- for name, model_id in MODELS:
25
- print(f"\n{'='*60}")
26
- print(f" {name.upper()} ({model_id})")
27
- print(f"{'='*60}")
28
 
29
  local_dir = f"/tmp/models/{name}"
30
  if not os.path.exists(f"{local_dir}/config.json"):
31
- print(f" Downloading {model_id}...")
32
  from huggingface_hub import snapshot_download
33
  snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))
34
 
35
  out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
36
-
37
- # Step 1: Generate semantic tokens using the CLI
38
  semantic_dir = f"{OUT}/{name}_semantic"
39
  os.makedirs(semantic_dir, exist_ok=True)
40
 
41
  cmd = [
42
  sys.executable, "-m", "fish_speech.models.text2semantic.inference",
43
  "--text", f"<|speaker:0|>{GEN_TEXT}",
44
- "--prompt-audio", "/app/reference/morgan_ref.wav",
45
  "--prompt-text", REF_TEXT,
46
  "--checkpoint-path", local_dir,
47
  "--output-dir", semantic_dir,
 
48
  "--num-samples", "1",
49
  "--max-new-tokens", "1024",
50
  "--top-p", "0.7",
@@ -53,27 +53,120 @@ def main():
53
  "--no-iterative-prompt",
54
  "--chunk-length", "0",
55
  "--device", "cuda",
56
- "--output", out_path,
57
  ]
58
 
59
- print(f" Generating semantic tokens...")
60
  env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
61
  result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
62
 
63
- print(f" CLI stdout (last 500): {result.stdout[-500:]}")
64
- if result.stderr:
65
- print(f" CLI stderr (last 500): {result.stderr[-500:]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if os.path.exists(out_path):
68
- sz = os.path.getsize(out_path)
69
- print(f" βœ… Saved {out_path} ({sz/1024:.0f}KB)")
 
 
70
  else:
71
- print(f" ❌ Output not found: {out_path}")
72
 
73
- # Upload
 
 
 
 
 
 
 
 
 
 
74
  print(f"\n{'='*60}")
75
- print(f" UPLOADING TO HUB")
76
  print(f"{'='*60}")
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  try:
78
  from huggingface_hub import HfApi
79
  api = HfApi()
@@ -87,7 +180,7 @@ def main():
87
  repo_type="model"
88
  )
89
  print(f" Uploaded samples/{fn}")
90
- print(f"\n https://huggingface.co/{repo}/tree/main/samples")
91
  except Exception as e:
92
  print(f" Upload error: {e}")
93
 
 
1
  #!/usr/bin/env python3
2
+ """Generate voice clone samples from ALL quantized Fish Speech S2 Pro variants."""
3
  import os, sys, json, time, gc, traceback, subprocess
4
  import torch
5
 
 
10
  GEN_TEXT = "Every man's life ends the same way. It is only the details of how he lived that distinguish one man from another."
11
  REF_TEXT = "Let me get this straight. You think that your client, one of the wealthiest most powerful men in the world, is secretly a vigilante who spends his nights beating criminals to a pulp with his bare hands. And your plan is to blackmail this person."
12
  OUT = "/tmp/samples"
13
+ REF_AUDIO = "/app/reference/morgan_ref.wav"
14
  os.makedirs(OUT, exist_ok=True)
15
 
16
+ # === PART 1: Python-based models (bf16, fp8, gptq) ===
17
+ PYTHON_MODELS = [
18
  ("baseline_bf16", "fishaudio/s2-pro"),
19
  ("fp8", "drbaph/s2-pro-fp8"),
20
+ ("gptq_w4a16", "baicai1145/s2-pro-w4a16"),
21
  ]
22
 
23
+ def gen_python_models():
24
+ print("\n" + "="*60)
25
+ print(" PART 1: Python-based models (bf16, fp8, gptq)")
26
+ print("="*60)
27
 
28
+ for name, model_id in PYTHON_MODELS:
29
+ print(f"\n [{name}] ({model_id})")
 
 
30
 
31
  local_dir = f"/tmp/models/{name}"
32
  if not os.path.exists(f"{local_dir}/config.json"):
 
33
  from huggingface_hub import snapshot_download
34
  snapshot_download(model_id, local_dir=local_dir, token=os.environ.get("HF_TOKEN"))
35
 
36
  out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
 
 
37
  semantic_dir = f"{OUT}/{name}_semantic"
38
  os.makedirs(semantic_dir, exist_ok=True)
39
 
40
  cmd = [
41
  sys.executable, "-m", "fish_speech.models.text2semantic.inference",
42
  "--text", f"<|speaker:0|>{GEN_TEXT}",
43
+ "--prompt-audio", REF_AUDIO,
44
  "--prompt-text", REF_TEXT,
45
  "--checkpoint-path", local_dir,
46
  "--output-dir", semantic_dir,
47
+ "--output", out_path,
48
  "--num-samples", "1",
49
  "--max-new-tokens", "1024",
50
  "--top-p", "0.7",
 
53
  "--no-iterative-prompt",
54
  "--chunk-length", "0",
55
  "--device", "cuda",
 
56
  ]
57
 
 
58
  env = {**os.environ, "PYTHONPATH": "/app/fish-speech"}
59
  result = subprocess.run(cmd, capture_output=True, text=True, timeout=600, env=env)
60
 
61
+ if os.path.exists(out_path):
62
+ import soundfile as sf
63
+ data, sr = sf.read(out_path)
64
+ dur = len(data) / sr
65
+ print(f" βœ… {out_path} ({dur:.1f}s)")
66
+ else:
67
+ print(f" ❌ Failed: {result.stderr[-200:]}")
68
+
69
+ # === PART 2: GGUF models via s2.cpp ===
70
+ GGUF_MODELS = [
71
+ ("gguf_q8_0", "s2-pro-q8_0.gguf"),
72
+ ("gguf_q6_k", "s2-pro-q6_k.gguf"),
73
+ ("gguf_q5_k_m", "s2-pro-q5_k_m.gguf"),
74
+ ("gguf_q4_k_m", "s2-pro-q4_k_m.gguf"),
75
+ ("gguf_q3_k", "s2-pro-q3_k.gguf"),
76
+ ("gguf_q2_k", "s2-pro-q2_k.gguf"),
77
+ ]
78
+
79
+ def build_s2cpp():
80
+ """Build s2.cpp with CUDA support."""
81
+ print("\n Building s2.cpp with CUDA...")
82
+ s2dir = "/tmp/s2.cpp"
83
+ if not os.path.exists(f"{s2dir}/build/s2"):
84
+ subprocess.run(["git", "clone", "--recurse-submodules",
85
+ "https://github.com/rodrigomatta/s2.cpp.git", s2dir],
86
+ capture_output=True, timeout=120)
87
+ subprocess.run(["cmake", "-B", "build", "-DCMAKE_BUILD_TYPE=Release", "-DS2_CUDA=ON"],
88
+ cwd=s2dir, capture_output=True, timeout=60)
89
+ subprocess.run(["cmake", "--build", "build", "--parallel"],
90
+ cwd=s2dir, capture_output=True, timeout=300)
91
+
92
+ if os.path.exists(f"{s2dir}/build/s2"):
93
+ print(" βœ… s2.cpp built")
94
+ return f"{s2dir}/build/s2"
95
+ return None
96
+
97
+ def gen_gguf_models():
98
+ print("\n" + "="*60)
99
+ print(" PART 2: GGUF models via s2.cpp")
100
+ print("="*60)
101
+
102
+ s2bin = build_s2cpp()
103
+ if not s2bin:
104
+ print(" ❌ Failed to build s2.cpp")
105
+ return
106
+
107
+ # Download GGUF models
108
+ from huggingface_hub import hf_hub_download
109
+ gguf_dir = "/tmp/gguf_models"
110
+ os.makedirs(gguf_dir, exist_ok=True)
111
+
112
+ # Download tokenizer
113
+ tok_path = hf_hub_download("rodrigomt/s2-pro-gguf", "tokenizer.json", local_dir=gguf_dir)
114
+
115
+ for name, gguf_file in GGUF_MODELS:
116
+ print(f"\n [{name}] ({gguf_file})")
117
+
118
+ # Download model
119
+ model_path = hf_hub_download("rodrigomt/s2-pro-gguf", gguf_file, local_dir=gguf_dir)
120
+ out_path = f"{OUT}/fish_{name}_morgan_clone.wav"
121
+
122
+ cmd = [
123
+ s2bin,
124
+ "-m", model_path,
125
+ "-t", tok_path,
126
+ "-pa", REF_AUDIO,
127
+ "-pt", REF_TEXT,
128
+ "-text", GEN_TEXT,
129
+ "-c", "0", # CUDA device 0
130
+ "-o", out_path,
131
+ ]
132
+
133
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
134
 
135
  if os.path.exists(out_path):
136
+ import soundfile as sf
137
+ data, sr = sf.read(out_path)
138
+ dur = len(data) / sr
139
+ print(f" βœ… {out_path} ({dur:.1f}s)")
140
  else:
141
+ print(f" ❌ Failed: {result.stderr[-200:]}")
142
 
143
+ # === MAIN ===
144
+ def main():
145
+ print(f"=== Fish Speech S2 Pro - Full Quantization Comparison ===")
146
+ print(f"GPU: {torch.cuda.get_device_name(0)}, VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f}GB")
147
+ print(f"Text: {GEN_TEXT}")
148
+ print(f"Ref: {REF_AUDIO}")
149
+
150
+ gen_python_models()
151
+ gen_gguf_models()
152
+
153
+ # Upload all samples
154
  print(f"\n{'='*60}")
155
+ print(f" UPLOADING ALL SAMPLES")
156
  print(f"{'='*60}")
157
+
158
+ import soundfile as sf
159
+ results = []
160
+ for fn in sorted(os.listdir(OUT)):
161
+ if fn.endswith(".wav"):
162
+ fpath = os.path.join(OUT, fn)
163
+ data, sr = sf.read(fpath)
164
+ dur = len(data) / sr
165
+ results.append((fn, dur, os.path.getsize(fpath)/1024))
166
+
167
+ for fn, dur, sz in results:
168
+ print(f" {fn}: {dur:.1f}s, {sz:.0f}KB")
169
+
170
  try:
171
  from huggingface_hub import HfApi
172
  api = HfApi()
 
180
  repo_type="model"
181
  )
182
  print(f" Uploaded samples/{fn}")
183
+ print(f"\n πŸ”— https://huggingface.co/{repo}/tree/main/samples")
184
  except Exception as e:
185
  print(f" Upload error: {e}")
186