Chatterbox

Sleeping

App Files Files Community

oicui commited on Dec 9, 2025

Commit

5329297

verified ·

1 Parent(s): ac0aeca

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -66

app.py CHANGED Viewed

@@ -28,6 +28,11 @@ def get_or_load_model():
             raise
     return MODEL
 # ---------------------------------------
 # UTILITIES
@@ -42,46 +47,15 @@ def set_seed(seed: int):
     np.random.seed(seed)
-# --- SMART CHUNKING (IMPROVED FOR NATURAL PAUSING) ---
 def smart_chunk_text(text: str, chunk_size: int):
-    """
-    Improved chunker:
-    - Supports newline as a break
-    - Splits by natural sentence boundaries
-    - Falls back to word-level splitting for long sentences
-    """
-    # Tách theo dấu câu hoặc xuống dòng
-    sentences = re.split(r"(?<=[\.\!\?…;])\s+|\n+", text)
     chunks = []
     current = ""
     for sentence in sentences:
-        sentence = sentence.strip()
-        if not sentence:
-            continue
-        # Nếu câu này quá dài → fallback tách theo từ
-        if len(sentence) > chunk_size:
-            if current:
-                chunks.append(current.strip())
-                current = ""
-            words = sentence.split(" ")
-            tmp = ""
-            for w in words:
-                if len(tmp) + len(w) + 1 > chunk_size:
-                    chunks.append(tmp.strip())
-                    tmp = w + " "
-                else:
-                    tmp += w + " "
-            if tmp:
-                chunks.append(tmp.strip())
-            continue
-        # Logic gộp câu vào chunk
-        if len(current) + len(sentence) + 1 > chunk_size:
             if current:
                 chunks.append(current.strip())
             current = sentence + " "
@@ -94,24 +68,10 @@ def smart_chunk_text(text: str, chunk_size: int):
     return chunks
-# --- CONCAT WITH NATURAL SILENCE ---
-def concat_audio(chunks, sample_rate: int, silence_ms: int = 150):
-    """Ghép các đoạn audio và chèn im lặng 150ms để ngắt nghỉ tự nhiên."""
     if not chunks:
         return None
-    if silence_ms <= 0:
-        return np.concatenate(chunks, axis=-1)
-    silence = np.zeros(int(sample_rate * silence_ms / 1000), dtype=chunks[0].dtype)
-    segs = []
-    for i, c in enumerate(chunks):
-        if i > 0:
-            segs.append(silence)
-        segs.append(c)
-    return np.concatenate(segs, axis=-1)
 # ---------------------------------------
@@ -136,12 +96,13 @@ def generate_tts_audio(
         raise RuntimeError("TTS model is not loaded.")
     # -------------------------
-    # SEED
     # -------------------------
     if seed_num_input == 0:
         used_seed = random.randint(1, 2**31 - 1)
     else:
         used_seed = int(seed_num_input)
     print(f"Using seed: {used_seed}")
     set_seed(used_seed)
@@ -157,7 +118,7 @@ def generate_tts_audio(
         generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
     # -------------------------
-    # SMART CHUNKING
     # -------------------------
     if enable_chunking:
         print(f"Smart chunking enabled — chunk size = {chunk_size_value}")
@@ -166,30 +127,20 @@ def generate_tts_audio(
         text_chunks = [text_input]
     audio_segments = []
     for i, chunk in enumerate(text_chunks):
         print(f"Rendering chunk {i+1}/{len(text_chunks)}...")
-        # USE NO_GRAD FOR SPEED & LESS MEMORY
-        with torch.no_grad():
-            wav = current_model.generate(chunk, **generate_kwargs)
         audio_segments.append(wav.squeeze(0).numpy())
-    # GHÉP CÓ CHÈN IM LẶNG (150ms)
-    final_audio = concat_audio(
-        audio_segments,
-        sample_rate=current_model.sr,
-        silence_ms=150
-    )
     print("Audio generation complete.")
     return (current_model.sr, final_audio), used_seed
 # ---------------------------------------
-# UI (UNCHANGED EXCEPT LOGIC ABOVE)
 # ---------------------------------------
 with gr.Blocks() as demo:
@@ -250,6 +201,7 @@ with gr.Blocks() as demo:
         with gr.Column():
             audio_output = gr.Audio(label="Output Audio")
     run_btn.click(
         fn=generate_tts_audio,
         inputs=[

             raise
     return MODEL
+try:
+    get_or_load_model()
+except Exception as e:
+    print(f"CRITICAL startup load failed: {e}")
 # ---------------------------------------
 # UTILITIES
     np.random.seed(seed)
+# --- SMART CHUNKING ---
 def smart_chunk_text(text: str, chunk_size: int):
+    sentences = re.split(r"(?<=[\.\!\?…;])\s+", text)
     chunks = []
     current = ""
     for sentence in sentences:
+        if len(current) + len(sentence) > chunk_size:
             if current:
                 chunks.append(current.strip())
             current = sentence + " "
     return chunks
+def concat_audio(chunks):
     if not chunks:
         return None
+    return np.concatenate(chunks, axis=-1)
 # ---------------------------------------
         raise RuntimeError("TTS model is not loaded.")
     # -------------------------
+    #   SEED HANDLING
     # -------------------------
     if seed_num_input == 0:
         used_seed = random.randint(1, 2**31 - 1)
     else:
         used_seed = int(seed_num_input)
     print(f"Using seed: {used_seed}")
     set_seed(used_seed)
         generate_kwargs["audio_prompt_path"] = audio_prompt_path_input
     # -------------------------
+    #    SMART CHUNK PROCESSING
     # -------------------------
     if enable_chunking:
         print(f"Smart chunking enabled — chunk size = {chunk_size_value}")
         text_chunks = [text_input]
     audio_segments = []
     for i, chunk in enumerate(text_chunks):
         print(f"Rendering chunk {i+1}/{len(text_chunks)}...")
+        wav = current_model.generate(chunk, **generate_kwargs)
         audio_segments.append(wav.squeeze(0).numpy())
+    final_audio = concat_audio(audio_segments)
     print("Audio generation complete.")
+    # FIXED OUTPUT FORMAT (Gradio-compatible)
     return (current_model.sr, final_audio), used_seed
 # ---------------------------------------
+# UI
 # ---------------------------------------
 with gr.Blocks() as demo:
         with gr.Column():
             audio_output = gr.Audio(label="Output Audio")
+    # CONNECT BUTTON
     run_btn.click(
         fn=generate_tts_audio,
         inputs=[