Spaces:

rahul7star
/

OhamLab-AI

Sleeping

App Files Files Community

rahul7star commited on Jan 12

Commit

5e3a18d

verified ·

1 Parent(s): e3e9cd7

hindi support test

Browse files

Files changed (1) hide show

app_qwen_tts_fast.py +36 -38

app_qwen_tts_fast.py CHANGED Viewed

@@ -16,9 +16,8 @@ MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 DOC_FILE = "general.md"
 TTS_API_URL = os.getenv(
     "TTS_API_URL",
-    "ETS"
 )
-print(TTS_API_URL)
 MAX_NEW_TOKENS = 128
 TOP_K = 3
@@ -30,6 +29,9 @@ SESSION = requests.Session()
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DOC_PATH = os.path.join(BASE_DIR, DOC_FILE)
 with open(DOC_PATH, "r", encoding="utf-8", errors="ignore") as f:
     DOC_TEXT = f.read()
@@ -46,25 +48,23 @@ def chunk_text(text, chunk_size=300, overlap=50):
 DOC_CHUNKS = chunk_text(DOC_TEXT)
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-DOC_EMBEDS = embedder.encode(
-    DOC_CHUNKS, normalize_embeddings=True, batch_size=32
-)
 # =====================================================
-# LOAD QWEN (FAST SETTINGS)
 # =====================================================
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    device_map="auto",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     trust_remote_code=True
 )
 model.eval()
 # =====================================================
-# RETRIEVAL
 # =====================================================
 @lru_cache(maxsize=256)
 def retrieve_context(question: str):
@@ -74,7 +74,7 @@ def retrieve_context(question: str):
     return "\n\n".join(DOC_CHUNKS[i] for i in top_ids)
 # =====================================================
-# QWEN ANSWER (FAST)
 # =====================================================
 def answer_question(question: str) -> str:
     context = retrieve_context(question)
@@ -100,7 +100,7 @@ def answer_question(question: str) -> str:
         messages, tokenize=False, add_generation_prompt=True
     )
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output = model.generate(
@@ -114,20 +114,19 @@ def answer_question(question: str) -> str:
     return decoded.split("\n")[-1].strip()
 # =====================================================
-# TTS (FAST + SAFE)
 # =====================================================
 @lru_cache(maxsize=128)
-def generate_audio(text: str) -> str:
     payload = {
         "text": text,
-        "language_id": "en",
         "mode": "Speak 🗣️"
     }
     r = SESSION.post(TTS_API_URL, json=payload, timeout=None)
     r.raise_for_status()
-    # Unique output path
     wav_path = f"/tmp/tts_{uuid.uuid4().hex}.wav"
     # Case 1: raw audio
@@ -138,34 +137,33 @@ def generate_audio(text: str) -> str:
     # Case 2: JSON base64
     data = r.json()
-    audio_b64 = (
-        data.get("audio")
-        or data.get("audio_base64")
-        or data.get("wav")
-    )
     if not audio_b64:
-        raise RuntimeError(f"TTS API returned no audio field: {data}")
     audio_bytes = base64.b64decode(audio_b64)
     with open(wav_path, "wb") as f:
         f.write(audio_bytes)
     if os.path.getsize(wav_path) < 1000:
-        raise RuntimeError("Generated audio file is too small")
     return wav_path
 # =====================================================
 # MAIN PIPELINE
 # =====================================================
-def run_pipeline(question):
     if not question.strip():
         return "", None
     answer = answer_question(question)
-    audio_path = generate_audio(answer)
     return f"**Bot:** {answer}", audio_path
@@ -173,31 +171,31 @@ def run_pipeline(question):
 # UI
 # =====================================================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
-        with gr.Column():
             user_input = gr.Textbox(
                 label="Your Question",
                 placeholder="Who is CEO of OhamLab?",
                 lines=3
             )
             ask_btn = gr.Button("Ask")
-        with gr.Column():
             answer_text = gr.Markdown()
             answer_audio = gr.Audio(type="filepath")
     ask_btn.click(
         fn=run_pipeline,
-        inputs=user_input,
         outputs=[answer_text, answer_audio]
     )
-demo.queue()  # enable long-running jobs (5 min audio OK)
-demo.launch(
-    server_name="0.0.0.0",
-    server_port=7860,
-    share=False
-)

 DOC_FILE = "general.md"
 TTS_API_URL = os.getenv(
     "TTS_API_URL",
+    "https://rahul7star-Chatterbox-Multilingual-TTS-API.hf.space/tts"
 )
 MAX_NEW_TOKENS = 128
 TOP_K = 3
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 DOC_PATH = os.path.join(BASE_DIR, DOC_FILE)
+if not os.path.exists(DOC_PATH):
+    raise RuntimeError(f"{DOC_FILE} not found")
 with open(DOC_PATH, "r", encoding="utf-8", errors="ignore") as f:
     DOC_TEXT = f.read()
 DOC_CHUNKS = chunk_text(DOC_TEXT)
+embedder = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
+DOC_EMBEDS = embedder.encode(DOC_CHUNKS, normalize_embeddings=True, batch_size=32)
 # =====================================================
+# LOAD QWEN MODEL (CPU only)
 # =====================================================
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    device_map="cpu",  # strictly CPU
+    torch_dtype=torch.float32,
     trust_remote_code=True
 )
 model.eval()
 # =====================================================
+# RETRIEVAL WITH CACHE
 # =====================================================
 @lru_cache(maxsize=256)
 def retrieve_context(question: str):
     return "\n\n".join(DOC_CHUNKS[i] for i in top_ids)
 # =====================================================
+# QWEN ANSWER (CPU optimized)
 # =====================================================
 def answer_question(question: str) -> str:
     context = retrieve_context(question)
         messages, tokenize=False, add_generation_prompt=True
     )
+    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
     with torch.no_grad():
         output = model.generate(
     return decoded.split("\n")[-1].strip()
 # =====================================================
+# TTS (CPU safe, flexible language)
 # =====================================================
 @lru_cache(maxsize=128)
+def generate_audio(text: str, language_id: str = "en") -> str:
     payload = {
         "text": text,
+        "language_id": language_id,
         "mode": "Speak 🗣️"
     }
     r = SESSION.post(TTS_API_URL, json=payload, timeout=None)
     r.raise_for_status()
     wav_path = f"/tmp/tts_{uuid.uuid4().hex}.wav"
     # Case 1: raw audio
     # Case 2: JSON base64
     data = r.json()
+    audio_b64 = data.get("audio") or data.get("audio_base64") or data.get("wav")
     if not audio_b64:
+        raise RuntimeError(f"TTS API returned no audio: {data}")
     audio_bytes = base64.b64decode(audio_b64)
     with open(wav_path, "wb") as f:
         f.write(audio_bytes)
     if os.path.getsize(wav_path) < 1000:
+        raise RuntimeError("Generated audio file too small")
     return wav_path
 # =====================================================
 # MAIN PIPELINE
 # =====================================================
+def run_pipeline(question: str, language_id: str):
     if not question.strip():
         return "", None
     answer = answer_question(question)
+    try:
+        audio_path = generate_audio(answer, language_id)
+    except Exception as e:
+        print("TTS generation failed:", e)
+        audio_path = None
     return f"**Bot:** {answer}", audio_path
 # UI
 # =====================================================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📄 Qwen CPU Assistant + TTS")
     with gr.Row():
+        with gr.Column(scale=1):
             user_input = gr.Textbox(
                 label="Your Question",
                 placeholder="Who is CEO of OhamLab?",
                 lines=3
             )
+            language_dropdown = gr.Dropdown(
+                label="TTS Language",
+                choices=["en", "hi"],
+                value="en"
+            )
             ask_btn = gr.Button("Ask")
+        with gr.Column(scale=1):
             answer_text = gr.Markdown()
             answer_audio = gr.Audio(type="filepath")
     ask_btn.click(
         fn=run_pipeline,
+        inputs=[user_input, language_dropdown],
         outputs=[answer_text, answer_audio]
     )
+demo.queue()  # long-running jobs OK (up to 5 min audio)
+demo.launch(server_name="0.0.0.0", server_port=7860, share=False)