Spaces:

Alstears
/

chatterbox-id-clone-api

Sleeping

App Files Files Community

Alstears commited on 15 days ago

Commit

0da44d7

verified ·

1 Parent(s): 6bc0c19

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -22

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = ""  # paksa CPU-only
 import re
 import inspect
 import tempfile
 import traceback
@@ -13,9 +14,9 @@ import torchaudio as ta
 import gradio as gr
 # =========================
-# HARD PATCH CPU DESERIALIZE
 # =========================
-torch.cuda.is_available = lambda: False
 _original_torch_load = torch.load
 def _torch_load_cpu(*args, **kwargs):
@@ -30,6 +31,7 @@ if hasattr(torch.jit, "load"):
         return _original_jit_load(*args, **kwargs)
     torch.jit.load = _jit_load_cpu
 # =========================
 # MODEL IMPORT
 # =========================
@@ -60,7 +62,6 @@ def get_model():
                 t3_state = load_file(ckpt_path, device="cpu")
                 m.t3.load_state_dict(t3_state)
-                # ChatterboxTTS tidak punya .to(), jadi jangan pakai m.to("cpu")
                 if hasattr(m, "eval"):
                     m.eval()
@@ -72,6 +73,13 @@ def get_model():
 def _download_wav(url: str) -> str:
     r = requests.get(url, timeout=90)
     r.raise_for_status()
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
     tmp.write(r.content)
     tmp.close()
@@ -79,16 +87,23 @@ def _download_wav(url: str) -> str:
 def _resolve_audio_input(audio_file, audio_url: str):
-    # gr.Audio(type="filepath") biasanya return string path
     if isinstance(audio_file, str) and audio_file.strip():
         return audio_file
-    # fallback kalau format dict
     if isinstance(audio_file, dict):
         p = audio_file.get("path")
         if p:
             return p
     if audio_url and audio_url.strip():
         return _download_wav(audio_url.strip())
@@ -99,13 +114,75 @@ def _prepare_text_exact(text: str) -> str:
     t = (text or "").strip()
     if not t:
         raise gr.Error("Text prompt tidak boleh kosong.")
-    # tambah tanda akhir agar model tidak lanjut ngawur
     if not re.search(r"[.!?…]$", t):
         t += "."
     return t
 def _generate_with_safe_kwargs(model, text: str, prompt_path: str):
     sig = inspect.signature(model.generate)
     params = sig.parameters
@@ -113,7 +190,7 @@ def _generate_with_safe_kwargs(model, text: str, prompt_path: str):
     if "audio_prompt_path" in params:
         kwargs["audio_prompt_path"] = prompt_path
-    # Set parameter jika didukung versi chatterbox yang terpasang
     if "temperature" in params:
         kwargs["temperature"] = 0.05
     if "top_p" in params:
@@ -123,40 +200,57 @@ def _generate_with_safe_kwargs(model, text: str, prompt_path: str):
     if "cfg_weight" in params:
         kwargs["cfg_weight"] = 0.3
-    # Coba gaya pemanggilan paling umum
     try:
         return model.generate(text, **kwargs)
     except TypeError:
-        # fallback: beberapa versi pakai named argument
         if "text" in params:
             kwargs["text"] = text
             return model.generate(**kwargs)
-        # fallback paling basic
         return model.generate(text)
 def clone_voice(text: str, audio_file, audio_url: str):
     try:
-        text = _prepare_text_exact(text)
         prompt_path = _resolve_audio_input(audio_file, audio_url)
         if not prompt_path:
             raise gr.Error("Upload WAV atau isi Audio URL WAV.")
         model = get_model()
-        # bikin output lebih konsisten
         torch.manual_seed(42)
         with torch.no_grad():
-            wav = _generate_with_safe_kwargs(model, text, prompt_path)
-        if wav.dim() == 1:
-            wav = wav.unsqueeze(0)
-        sr = getattr(model, "sr", 24000)
         out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
-        ta.save(out_path, wav.cpu(), sr)
         return out_path
     except Exception as e:
@@ -167,17 +261,19 @@ def clone_voice(text: str, audio_file, audio_url: str):
 with gr.Blocks(title="Chatterbox Indonesian Voice Cloning (CPU)") as demo:
     gr.Markdown("## Chatterbox-TTS Indonesian (CPU)")
-    gr.Markdown("Masukkan teks + upload WAV (atau URL WAV)")
     text_in = gr.Textbox(
         label="Text Prompt",
-        lines=4,
-        placeholder="Contoh: Apa kabar."
     )
     wav_in = gr.Audio(
         label="Upload WAV Prompt",
         type="filepath"
     )
     url_in = gr.Textbox(
         label="Audio URL WAV (opsional)",
         placeholder="https://example.com/input.wav"

 import os
+os.environ["CUDA_VISIBLE_DEVICES"] = ""  # force CPU only
 import re
+import io
 import inspect
 import tempfile
 import traceback
 import gradio as gr
 # =========================
+# HARD PATCH: FORCE CPU DESERIALIZATION
 # =========================
+torch.cuda.is_available = lambda: False  # noqa
 _original_torch_load = torch.load
 def _torch_load_cpu(*args, **kwargs):
         return _original_jit_load(*args, **kwargs)
     torch.jit.load = _jit_load_cpu
 # =========================
 # MODEL IMPORT
 # =========================
                 t3_state = load_file(ckpt_path, device="cpu")
                 m.t3.load_state_dict(t3_state)
                 if hasattr(m, "eval"):
                     m.eval()
 def _download_wav(url: str) -> str:
     r = requests.get(url, timeout=90)
     r.raise_for_status()
+    # Optional: basic content-type check
+    ctype = (r.headers.get("content-type") or "").lower()
+    if "audio" not in ctype and not url.lower().endswith(".wav"):
+        # tetap lanjut, karena beberapa server salah header
+        pass
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
     tmp.write(r.content)
     tmp.close()
 def _resolve_audio_input(audio_file, audio_url: str):
+    """
+    Support beberapa format dari gradio:
+    - str path
+    - dict {"path": "..."}
+    - None
+    """
+    # 1) filepath string
     if isinstance(audio_file, str) and audio_file.strip():
         return audio_file
+    # 2) dict format
     if isinstance(audio_file, dict):
         p = audio_file.get("path")
         if p:
             return p
+    # 3) URL fallback
     if audio_url and audio_url.strip():
         return _download_wav(audio_url.strip())
     t = (text or "").strip()
     if not t:
         raise gr.Error("Text prompt tidak boleh kosong.")
+    # rapikan whitespace
+    t = re.sub(r"\s+", " ", t)
+    # tambahkan tanda akhir agar model tidak lanjut ngawur
     if not re.search(r"[.!?…]$", t):
         t += "."
     return t
+def _split_text_safely(text: str, max_chars: int = 320):
+    """
+    Pecah teks panjang agar tidak truncate di tengah.
+    """
+    text = re.sub(r"\s+", " ", (text or "").strip())
+    if not text:
+        return []
+    # split per kalimat
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    chunks = []
+    current = ""
+    for s in sentences:
+        s = s.strip()
+        if not s:
+            continue
+        # kalau 1 kalimat terlalu panjang, pecah lagi pakai koma/semicolon
+        parts = [s]
+        if len(s) > max_chars:
+            parts = re.split(r'(?<=[,;:])\s+', s)
+        for p in parts:
+            p = p.strip()
+            if not p:
+                continue
+            # fallback keras: jika part masih sangat panjang, potong manual
+            if len(p) > max_chars:
+                for i in range(0, len(p), max_chars):
+                    piece = p[i:i + max_chars].strip()
+                    if not piece:
+                        continue
+                    if current:
+                        chunks.append(current)
+                        current = ""
+                    chunks.append(piece)
+                continue
+            candidate = f"{current} {p}".strip() if current else p
+            if len(candidate) <= max_chars:
+                current = candidate
+            else:
+                if current:
+                    chunks.append(current)
+                current = p
+    if current:
+        chunks.append(current)
+    return chunks
 def _generate_with_safe_kwargs(model, text: str, prompt_path: str):
+    """
+    Aman terhadap beda versi signature generate().
+    """
     sig = inspect.signature(model.generate)
     params = sig.parameters
     if "audio_prompt_path" in params:
         kwargs["audio_prompt_path"] = prompt_path
+    # parameter opsional (kalau didukung)
     if "temperature" in params:
         kwargs["temperature"] = 0.05
     if "top_p" in params:
     if "cfg_weight" in params:
         kwargs["cfg_weight"] = 0.3
+    # coba positional text dulu
     try:
         return model.generate(text, **kwargs)
     except TypeError:
+        # fallback named text
         if "text" in params:
             kwargs["text"] = text
             return model.generate(**kwargs)
+        # fallback terakhir
         return model.generate(text)
 def clone_voice(text: str, audio_file, audio_url: str):
     try:
         prompt_path = _resolve_audio_input(audio_file, audio_url)
         if not prompt_path:
             raise gr.Error("Upload WAV atau isi Audio URL WAV.")
+        # split dulu supaya tidak truncation
+        chunks = _split_text_safely(text, max_chars=320)
+        if not chunks:
+            raise gr.Error("Text prompt tidak boleh kosong.")
         model = get_model()
+        sr = getattr(model, "sr", 24000)
+        # deterministik ringan
         torch.manual_seed(42)
+        wav_parts = []
+        pause = torch.zeros(1, int(sr * 0.18))  # jeda antar chunk ~180ms
         with torch.no_grad():
+            for ch in chunks:
+                ch = _prepare_text_exact(ch)
+                wav = _generate_with_safe_kwargs(model, ch, prompt_path)
+                if wav.dim() == 1:
+                    wav = wav.unsqueeze(0)
+                wav_parts.append(wav.cpu())
+                wav_parts.append(pause)
+        # buang pause terakhir
+        if wav_parts:
+            wav_parts = wav_parts[:-1]
+        full_wav = torch.cat(wav_parts, dim=1)
         out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+        ta.save(out_path, full_wav, sr)
         return out_path
     except Exception as e:
 with gr.Blocks(title="Chatterbox Indonesian Voice Cloning (CPU)") as demo:
     gr.Markdown("## Chatterbox-TTS Indonesian (CPU)")
+    gr.Markdown("Masukkan teks + upload WAV (atau URL WAV). Teks panjang akan otomatis dipecah agar tidak kepotong.")
     text_in = gr.Textbox(
         label="Text Prompt",
+        lines=8,
+        placeholder="Contoh: Apa kabar. Hari ini kita belajar data mining."
     )
     wav_in = gr.Audio(
         label="Upload WAV Prompt",
         type="filepath"
     )
     url_in = gr.Textbox(
         label="Audio URL WAV (opsional)",
         placeholder="https://example.com/input.wav"