Spaces:

ruslanmv
/

ai-story-server-cpu

Running on Zero

App Files Files Community

ruslanmv commited on Sep 28

Commit

96b9f29

1 Parent(s): cd32542

First commit

Browse files

Files changed (2) hide show

app.py +191 -164
requirements.txt +6 -5

app.py CHANGED Viewed

@@ -1,168 +1,210 @@
 # ===================================================================================
-# 1. SETUP AND IMPORTS
 # ===================================================================================
 from __future__ import annotations
-import os
 import requests
-import base64
-import struct
-import re
-import textwrap
-import uuid
-from typing import List, Dict, Tuple, Generator
-# --- Load .env early (for HF_TOKEN / SECRET_TOKEN) ---
 from dotenv import load_dotenv
 load_dotenv()
-# --- Hugging Face Spaces & ZeroGPU ---
 try:
-    import spaces  # Required for ZeroGPU on HF
 except Exception:
-    # Allow local runs without the spaces package
     class _SpacesShim:
-        def GPU(self, *args, **kwargs):
-            def _wrap(fn):
-                return fn
             return _wrap
     spaces = _SpacesShim()
 import gradio as gr
-# --- Core ML & Data Libraries ---
 import torch
 import numpy as np
 from huggingface_hub import HfApi, hf_hub_download
 from llama_cpp import Llama
-# --- TTS Libraries ---
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
 from TTS.utils.manage import ModelManager
 from TTS.utils.generic_utils import get_user_data_dir
-# --- Text & Audio Processing ---
-import nltk
-import langid
-import emoji
-import noisereduce as nr
-# ===================================================================================
-# 2. GLOBAL CONFIGURATION & HELPER FUNCTIONS
-# ===================================================================================
-# Download NLTK data (punkt)
 nltk.download("punkt", quiet=True)
-os.environ["COQUI_TOS_AGREED"] = "1"
-# Cached models
-tts_model: Xtts | None = None
-llm_model: Llama | None = None
-# Configuration
-HF_TOKEN = os.environ.get("HF_TOKEN")
 api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
 repo_id = "ruslanmv/ai-story-server"
 SECRET_TOKEN = os.getenv("SECRET_TOKEN", "secret")
 SENTENCE_SPLIT_LENGTH = 250
 LLM_STOP_WORDS = ["</s>", "<|user|>", "/s>"]
-# System prompts and roles
 default_system_message = (
     "You're a storyteller crafting a short tale for young listeners. Keep sentences short and simple. "
     "Use narrative style only, without lists or complex words. Type numbers as words (e.g., 'ten')."
 )
 system_message = os.environ.get("SYSTEM_MESSAGE", default_system_message)
-ROLES = ["Cloée", "Julian", "Pirate", "Thera"]
-ROLE_PROMPTS = {role: system_message for role in ROLES}
 ROLE_PROMPTS["Pirate"] = (
     "You are AI Beard, a pirate. Craft your response from his first-person perspective. "
     "Keep answers short, as if in a real conversation. Only provide the words AI Beard would speak."
 )
-# --- Audio helpers ---
-def pcm_to_wav(pcm_data: bytes, sample_rate: int = 24000, channels: int = 1, bit_depth: int = 16) -> bytes:
-    if pcm_data.startswith(b"RIFF"):
-        return pcm_data
-    chunk_size = 36 + len(pcm_data)
-    header = struct.pack(
-        "<4sI4s4sIHHIIHH4sI",
-        b"RIFF", chunk_size, b"WAVE", b"fmt ",
-        16, 1, channels, sample_rate,
-        sample_rate * channels * bit_depth // 8,
-        channels * bit_depth // 8, bit_depth,
-        b"data", len(pcm_data)
     )
-    return header + pcm_data
 def split_sentences(text: str, max_len: int) -> List[str]:
-    sentences = nltk.sent_tokenize(text)
-    chunks: List[str] = []
-    for sent in sentences:
         if len(sent) > max_len:
-            chunks.extend(textwrap.wrap(sent, max_len, break_long_words=True))
         else:
-            chunks.append(sent)
-    return chunks
-def format_prompt_zephyr(message: str, history: List[Tuple[str, str | None]], system_message: str) -> str:
-    prompt = f"<|system|>\n{system_message}</s>"
-    for user_prompt, bot_response in history:
-        if bot_response:
-            prompt += f"<|user|>\n{user_prompt}</s><|assistant|>\n{bot_response}</s>"
     prompt += f"<|user|>\n{message}</s><|assistant|>"
     return prompt
 # ===================================================================================
-# 3. CORE AI FUNCTIONS (Model Loading & Inference)
 # ===================================================================================
 def _load_xtts(device: str) -> Xtts:
     print("Loading Coqui XTTS V2 model (first run)...")
-    model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
-    ModelManager().download_model(model_name)
-    model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
-    config = XttsConfig()
-    config.load_json(os.path.join(model_path, "config.json"))
-    model = Xtts.init_from_config(config)
-    # NOTE: deepspeed not installed; keep False for Spaces
     model.load_checkpoint(
-        config,
-        checkpoint_path=os.path.join(model_path, "model.pth"),
-        vocab_path=os.path.join(model_path, "vocab.json"),
         eval=True,
-        use_deepspeed=False,
     )
     model.to(device)
-    print("XTTS model loaded.")
     return model
 def _load_llama() -> Llama:
     print("Loading LLM (Zephyr) (first run)...")
-    zephyr_model_path = hf_hub_download(
         repo_id="TheBloke/zephyr-7B-beta-GGUF",
         filename="zephyr-7b-beta.Q5_K_M.gguf"
     )
-    # Try GPU offload if available, else CPU
     for n_gpu_layers in (-1, 0):
         try:
             llm = Llama(
-                model_path=zephyr_model_path,
                 n_gpu_layers=n_gpu_layers,
                 n_ctx=4096,
                 n_batch=512,
                 verbose=False
             )
-            if n_gpu_layers == -1:
-                print("LLM loaded with GPU offload.")
-            else:
-                print("LLM loaded (CPU).")
             return llm
         except Exception as e:
-            print(f"LLM init with n_gpu_layers={n_gpu_layers} failed: {e}")
-    raise RuntimeError("Failed to initialize Llama model.")
 def load_models() -> Tuple[Xtts, Llama]:
     global tts_model, llm_model
@@ -173,134 +215,119 @@ def load_models() -> Tuple[Xtts, Llama]:
         llm_model = _load_llama()
     return tts_model, llm_model
-def generate_text_stream(llm_instance: Llama, prompt: str,
                          history: List[Tuple[str, str | None]],
-                         system_message: str) -> Generator[str, None, None]:
-    formatted_prompt = format_prompt_zephyr(prompt, history, system_message)
-    stream = llm_instance(
-        formatted_prompt,
         temperature=0.7,
         max_tokens=512,
         top_p=0.95,
         stop=LLM_STOP_WORDS,
         stream=True
     )
-    for response in stream:
-        ch = response["choices"][0]["text"]
-        # Guard against control tokens & isolated emoji artefacts
         if "<|user|>" in ch or (len(ch) == 1 and emoji.is_emoji(ch)):
             continue
         yield ch
-def generate_audio_stream(tts_instance: Xtts, text: str, language: str,
                           latents: Tuple[np.ndarray, np.ndarray]) -> Generator[bytes, None, None]:
-    gpt_cond_latent, speaker_embedding = latents
     try:
-        for chunk in tts_instance.inference_stream(
             text=text,
-            language=language,
-            gpt_cond_latent=gpt_cond_latent,
-            speaker_embedding=speaker_embedding,
             temperature=0.85,
         ):
             if chunk is not None:
                 yield chunk.detach().cpu().numpy().squeeze().tobytes()
     except RuntimeError as e:
-        print(f"Error during TTS inference: {e}")
-        # Soft-restart if GPU went bad and we can talk to the HF API
         if "device-side assert" in str(e) and api:
-            gr.Warning("Critical GPU error. Attempting to restart the Space...")
             try:
                 api.restart_space(repo_id=repo_id)
-            except Exception as _:
                 pass
 # ===================================================================================
-# 4. MAIN GRADIO FUNCTION (Decorated for ZeroGPU)
 # ===================================================================================
-@spaces.GPU(duration=120)  # Request GPU for 120 seconds
-def generate_story_and_speech(secret_token_input: str, input_text: str, chatbot_role: str) -> List[Dict[str, str]]:
     if secret_token_input != SECRET_TOKEN:
         raise gr.Error("Invalid secret token provided.")
     if not input_text:
         return []
-    # Load models
     tts, llm = load_models()
-    # Pre-compute voice latents
-    latent_map: Dict[str, Tuple[np.ndarray, np.ndarray]] = {}
-    for role, filename in [
-        ("Cloée", "cloee-1.wav"),
-        ("Julian", "julian-bedtime-style-1.wav"),
-        ("Pirate", "pirate_by_coqui.wav"),
-        ("Thera", "thera-1.wav"),
-    ]:
-        path = os.path.join("voices", filename)
-        latent_map[role] = tts.get_conditioning_latents(
-            audio_path=path, gpt_cond_len=30, max_ref_length=60
-        )
-    # Generate story text
-    history: List[Tuple[str, str | None]] = [(input_text, None)]
-    full_story_text = "".join(
-        generate_text_stream(llm, history[-1][0], history[:-1], system_message=ROLE_PROMPTS[chatbot_role])
-    ).strip()
-    if not full_story_text:
         return []
-    # Tokenize into shorter sentences for TTS
-    sentences = split_sentences(full_story_text, SENTENCE_SPLIT_LENGTH)
     lang = langid.classify(sentences[0])[0] if sentences else "en"
-    results: List[Dict[str, str]] = []
-    for sentence in sentences:
-        if not any(c.isalnum() for c in sentence):
             continue
-        audio_chunks = generate_audio_stream(tts, sentence, lang, latent_map[chatbot_role])
-        pcm_data = b"".join(chunk for chunk in audio_chunks if chunk)
-        # Optional noise reduction (best-effort)
         try:
-            data_s16 = np.frombuffer(pcm_data, dtype=np.int16)
-            if data_s16.size > 0:
-                float_data = data_s16.astype(np.float32) / 32767.0
-                reduced = nr.reduce_noise(y=float_data, sr=24000)
-                final_pcm = (reduced * 32767).astype(np.int16).tobytes()
-            else:
-                final_pcm = pcm_data
         except Exception:
-            final_pcm = pcm_data
-        b64_wav = base64.b64encode(pcm_to_wav(final_pcm)).decode("utf-8")
-        results.append({"text": sentence, "audio": b64_wav})
     return results
 # ===================================================================================
-# 5. GRADIO INTERFACE LAUNCH
 # ===================================================================================
-# Download voice files on startup
-print("Downloading voice files...")
-file_names = ["cloee-1.wav", "julian-bedtime-style-1.wav", "pirate_by_coqui.wav", "thera-1.wav"]
-base_url = "https://raw.githubusercontent.com/ruslanmv/ai-story-server/main/voices/"
-os.makedirs("voices", exist_ok=True)
-for name in file_names:
-    dst = os.path.join("voices", name)
-    if not os.path.exists(dst):
-        try:
-            resp = requests.get(base_url + name, timeout=30)
-            resp.raise_for_status()
-            with open(dst, "wb") as f:
-                f.write(resp.content)
-        except Exception as e:
-            print(f"Failed to download {name}: {e}")
-# Define the Gradio Interface
 demo = gr.Interface(
     fn=generate_story_and_speech,
     inputs=[
@@ -311,8 +338,8 @@ demo = gr.Interface(
     outputs=gr.JSON(label="Story and Audio Output"),
     title="AI Storyteller with ZeroGPU",
     description="Enter a prompt to generate a short story with voice narration using on-demand GPU.",
-    allow_flagging="never",
 )
 if __name__ == "__main__":
-    demo.queue().launch()

 # ===================================================================================
+# 1) SETUP & IMPORTS
 # ===================================================================================
 from __future__ import annotations
+import os, base64, struct, textwrap, re
 import requests
+from typing import List, Tuple, Dict, Generator
+# Load .env early (HF_TOKEN / SECRET_TOKEN)
 from dotenv import load_dotenv
 load_dotenv()
+# Make downloads fast & quiet
+os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+os.environ.setdefault("COQUI_TOS_AGREED", "1")
+# Avoid Gradio analytics pandas edge-cases
+os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
+# HF Spaces / Gradio
 try:
+    import spaces  # ZeroGPU decorator
 except Exception:
     class _SpacesShim:
+        def GPU(self, *a, **k):
+            def _wrap(fn): return fn
             return _wrap
     spaces = _SpacesShim()
 import gradio as gr
+# Core ML
 import torch
 import numpy as np
 from huggingface_hub import HfApi, hf_hub_download
 from llama_cpp import Llama
+# Coqui TTS (XTTS v2)
 from TTS.tts.configs.xtts_config import XttsConfig
 from TTS.tts.models.xtts import Xtts
 from TTS.utils.manage import ModelManager
 from TTS.utils.generic_utils import get_user_data_dir
+# Text / audio processing
+import nltk, langid, emoji, noisereduce as nr
+# Download NLTK data once
 nltk.download("punkt", quiet=True)
+# ===================================================================================
+# 2) GLOBALS & HELPERS
+# ===================================================================================
+HF_TOKEN = os.getenv("HF_TOKEN")
 api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
 repo_id = "ruslanmv/ai-story-server"
 SECRET_TOKEN = os.getenv("SECRET_TOKEN", "secret")
 SENTENCE_SPLIT_LENGTH = 250
 LLM_STOP_WORDS = ["</s>", "<|user|>", "/s>"]
+# Cached models & latents
+tts_model: Xtts | None = None
+llm_model: Llama | None = None
+voice_latents: Dict[str, Tuple[np.ndarray, np.ndarray]] = {}
+ROLES = ["Cloée", "Julian", "Pirate", "Thera"]
 default_system_message = (
     "You're a storyteller crafting a short tale for young listeners. Keep sentences short and simple. "
     "Use narrative style only, without lists or complex words. Type numbers as words (e.g., 'ten')."
 )
 system_message = os.environ.get("SYSTEM_MESSAGE", default_system_message)
+ROLE_PROMPTS = {r: system_message for r in ROLES}
 ROLE_PROMPTS["Pirate"] = (
     "You are AI Beard, a pirate. Craft your response from his first-person perspective. "
     "Keep answers short, as if in a real conversation. Only provide the words AI Beard would speak."
 )
+def pcm_to_wav(pcm: bytes, sr: int = 24000, ch: int = 1, bit: int = 16) -> bytes:
+    if pcm.startswith(b"RIFF"):  # already WAV
+        return pcm
+    chunk = 36 + len(pcm)
+    hdr = struct.pack("<4sI4s4sIHHIIHH4sI",
+        b"RIFF", chunk, b"WAVE", b"fmt ", 16, 1, ch, sr,
+        sr * ch * bit // 8, ch * bit // 8, bit, b"data", len(pcm)
     )
+    return hdr + pcm
 def split_sentences(text: str, max_len: int) -> List[str]:
+    out: List[str] = []
+    for sent in nltk.sent_tokenize(text):
         if len(sent) > max_len:
+            out.extend(textwrap.wrap(sent, max_len, break_long_words=True))
         else:
+            out.append(sent)
+    return out
+def format_prompt_zephyr(message: str, history: List[Tuple[str, str | None]], sys_msg: str) -> str:
+    prompt = f"<|system|>\n{sys_msg}</s>"
+    for u, a in history:
+        if a:
+            prompt += f"<|user|>\n{u}</s><|assistant|>\n{a}</s>"
     prompt += f"<|user|>\n{message}</s><|assistant|>"
     return prompt
 # ===================================================================================
+# 3) PRE-CACHE (FIRST-RUN DOWNLOADS ONLY)
+# ===================================================================================
+def _xtts_paths() -> Tuple[str, str, str, str]:
+    """
+    Returns (model_dir, model_pth, vocab_json, speakers_pth) for XTTS v2.
+    Ensures the model is downloaded.
+    """
+    model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
+    ModelManager().download_model(model_name)  # idempotent
+    model_dir = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
+    return (
+        model_dir,
+        os.path.join(model_dir, "model.pth"),
+        os.path.join(model_dir, "vocab.json"),
+        os.path.join(model_dir, "speakers_xtts.pth"),
+    )
+def precache_assets() -> None:
+    """Download all large artifacts so the first inference is fast."""
+    # Voices
+    print("Pre-caching voice files...")
+    base_url = "https://raw.githubusercontent.com/ruslanmv/ai-story-server/main/voices/"
+    os.makedirs("voices", exist_ok=True)
+    for name in ["cloee-1.wav", "julian-bedtime-style-1.wav", "pirate_by_coqui.wav", "thera-1.wav"]:
+        dst = os.path.join("voices", name)
+        if not os.path.exists(dst):
+            try:
+                r = requests.get(base_url + name, timeout=30)
+                r.raise_for_status()
+                with open(dst, "wb") as f:
+                    f.write(r.content)
+            except Exception as e:
+                print(f"Warning: failed to fetch {name}: {e}")
+    # XTTS weights (CPU-safe: just files)
+    print("Pre-caching XTTS model files...")
+    model_dir, model_pth, vocab_json, speakers_pth = _xtts_paths()
+    for p in [model_pth, vocab_json, speakers_pth, os.path.join(model_dir, "config.json")]:
+        if not os.path.exists(p):
+            print(f"Warning: missing expected XTTS file: {p}")
+    # Llama GGUF
+    print("Pre-caching LLM (Zephyr GGUF)...")
+    try:
+        hf_hub_download(
+            repo_id="TheBloke/zephyr-7B-beta-GGUF",
+            filename="zephyr-7b-beta.Q5_K_M.gguf",
+            force_download=False
+        )
+    except Exception as e:
+        print(f"Warning: GGUF download error: {e}")
+# Run pre-cache at import time (downloads only; no GPU needed)
+precache_assets()
+# ===================================================================================
+# 4) MODEL LOADERS
 # ===================================================================================
 def _load_xtts(device: str) -> Xtts:
     print("Loading Coqui XTTS V2 model (first run)...")
+    model_dir, model_pth, vocab_json, speakers_pth = _xtts_paths()
+    cfg = XttsConfig()
+    cfg.load_json(os.path.join(model_dir, "config.json"))
+    model = Xtts.init_from_config(cfg)
+    # IMPORTANT: pass speaker_file_path to avoid NoneType join inside library
     model.load_checkpoint(
+        cfg,
+        checkpoint_path=model_pth,
+        vocab_path=vocab_json,
+        speaker_file_path=speakers_pth,  # <-- fixes TypeError
         eval=True,
+        use_deepspeed=False,              # deepspeed not installed
     )
     model.to(device)
+    print("XTTS model ready.")
     return model
 def _load_llama() -> Llama:
     print("Loading LLM (Zephyr) (first run)...")
+    gguf = hf_hub_download(
         repo_id="TheBloke/zephyr-7B-beta-GGUF",
         filename="zephyr-7b-beta.Q5_K_M.gguf"
     )
+    # Try GPU offload then CPU
     for n_gpu_layers in (-1, 0):
         try:
             llm = Llama(
+                model_path=gguf,
                 n_gpu_layers=n_gpu_layers,
                 n_ctx=4096,
                 n_batch=512,
                 verbose=False
             )
+            print("LLM loaded with " + ("GPU offload" if n_gpu_layers == -1 else "CPU"))
             return llm
         except Exception as e:
+            print(f"LLM init failed (n_gpu_layers={n_gpu_layers}): {e}")
+    raise RuntimeError("Failed to initialize Llama.")
 def load_models() -> Tuple[Xtts, Llama]:
     global tts_model, llm_model
         llm_model = _load_llama()
     return tts_model, llm_model
+# ===================================================================================
+# 5) GENERATION
+# ===================================================================================
+def generate_text_stream(llm: Llama, prompt: str,
                          history: List[Tuple[str, str | None]],
+                         sys_msg: str) -> Generator[str, None, None]:
+    formatted = format_prompt_zephyr(prompt, history, sys_msg)
+    stream = llm(
+        formatted,
         temperature=0.7,
         max_tokens=512,
         top_p=0.95,
         stop=LLM_STOP_WORDS,
         stream=True
     )
+    for resp in stream:
+        ch = resp["choices"][0]["text"]
         if "<|user|>" in ch or (len(ch) == 1 and emoji.is_emoji(ch)):
             continue
         yield ch
+def generate_audio_stream(tts: Xtts, text: str, lang: str,
                           latents: Tuple[np.ndarray, np.ndarray]) -> Generator[bytes, None, None]:
+    gpt_lat, spk_emb = latents
     try:
+        for chunk in tts.inference_stream(
             text=text,
+            language=lang,
+            gpt_cond_latent=gpt_lat,
+            speaker_embedding=spk_emb,
             temperature=0.85,
         ):
             if chunk is not None:
                 yield chunk.detach().cpu().numpy().squeeze().tobytes()
     except RuntimeError as e:
+        print(f"TTS inference error: {e}")
         if "device-side assert" in str(e) and api:
             try:
+                gr.Warning("Critical GPU error. Attempting to restart the Space...")
                 api.restart_space(repo_id=repo_id)
+            except Exception:
                 pass
 # ===================================================================================
+# 6) ZERO-GPU MAIN FUNCTION
 # ===================================================================================
+@spaces.GPU(duration=120)
+def generate_story_and_speech(secret_token_input: str, input_text: str, chatbot_role: str):
     if secret_token_input != SECRET_TOKEN:
         raise gr.Error("Invalid secret token provided.")
     if not input_text:
         return []
     tts, llm = load_models()
+    # Pre-compute & cache voice latents once per session
+    global voice_latents
+    if not voice_latents:
+        for role, fname in [
+            ("Cloée", "cloee-1.wav"),
+            ("Julian", "julian-bedtime-style-1.wav"),
+            ("Pirate", "pirate_by_coqui.wav"),
+            ("Thera", "thera-1.wav"),
+        ]:
+            path = os.path.join("voices", fname)
+            voice_latents[role] = tts.get_conditioning_latents(
+                audio_path=path, gpt_cond_len=30, max_ref_length=60
+            )
+    # Generate story
+    history = [(input_text, None)]
+    story = "".join(generate_text_stream(llm, history[-1][0], history[:-1], ROLE_PROMPTS[chatbot_role])).strip()
+    if not story:
         return []
+    # Clean & split
+    story = re.sub(r"([^\x00-\x7F]|\w)([.?!]+)", r"\1 \2", story)
+    sentences = split_sentences(story, SENTENCE_SPLIT_LENGTH)
     lang = langid.classify(sentences[0])[0] if sentences else "en"
+    results = []
+    for s in sentences:
+        if not any(c.isalnum() for c in s):
             continue
+        pcm_chunks = generate_audio_stream(tts, s, lang, voice_latents[chatbot_role])
+        pcm = b"".join(ch for ch in pcm_chunks if ch)
+        # Best-effort noise reduction
         try:
+            arr = np.frombuffer(pcm, dtype=np.int16)
+            if arr.size:
+                wav_f32 = arr.astype(np.float32) / 32767.0
+                denoised = nr.reduce_noise(y=wav_f32, sr=24000)
+                pcm = (denoised * 32767).astype(np.int16).tobytes()
         except Exception:
+            pass
+        b64 = base64.b64encode(pcm_to_wav(pcm)).decode("utf-8")
+        results.append({"text": s, "audio": b64})
     return results
 # ===================================================================================
+# 7) UI
 # ===================================================================================
+print("Downloading voice files (idempotent)...")
+# Already handled in precache, but keep for local dev logs
+# (No-op if files exist)
 demo = gr.Interface(
     fn=generate_story_and_speech,
     inputs=[
     outputs=gr.JSON(label="Story and Audio Output"),
     title="AI Storyteller with ZeroGPU",
     description="Enter a prompt to generate a short story with voice narration using on-demand GPU.",
+    flagging_mode="never",  # replaces deprecated allow_flagging
 )
 if __name__ == "__main__":
+    demo.queue().launch()  # you can add ssr_mode=False if you prefer

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-# ZeroGPU and Core
 torch==2.2.2
 torchaudio==2.2.2
 gradio==5.47.2
@@ -7,15 +7,16 @@ python-dotenv
 spaces
 requests
 numpy
-# TTS Dependencies
 TTS @ git+https://github.com/coqui-ai/TTS@v0.22.0
 pydantic==2.5.3
-# LLM Dependencies
 llama-cpp-python==0.2.79
-# Audio & Text Processing
 noisereduce==3.0.3
 pydub
 langid
@@ -23,6 +24,6 @@ nltk
 emoji
 ffmpeg-python
-# Japanese Text (if needed by TTS)
 mecab-python3==1.0.9
 unidic-lite==1.0.8

+# Core
 torch==2.2.2
 torchaudio==2.2.2
 gradio==5.47.2
 spaces
 requests
 numpy
+pandas>=2.2.2,<3  # Fixes Gradio analytics OptionError
+# TTS
 TTS @ git+https://github.com/coqui-ai/TTS@v0.22.0
 pydantic==2.5.3
+# LLM
 llama-cpp-python==0.2.79
+# Audio & Text
 noisereduce==3.0.3
 pydub
 langid
 emoji
 ffmpeg-python
+# Japanese Text (optional)
 mecab-python3==1.0.9
 unidic-lite==1.0.8