Spaces:

riyans98
/

runllm

Runtime error

App Files Files Community

riyans98 commited on Sep 25, 2025

Commit

7836e24

verified ·

1 Parent(s): fa106ae

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -34

app.py CHANGED Viewed

@@ -1,18 +1,9 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-from transformers import VitsModel, AutoTokenizer
-import torch
-import numpy as np
-import io
-import soundfile as sf
-# Load TTS model once at startup for efficiency
-tts_model = VitsModel.from_pretrained("facebook/mms-tts-hne")
-tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-hne")
 def respond(
     message,
-    history: list,
     system_message,
     max_tokens,
     temperature,
@@ -24,43 +15,31 @@ def respond(
     """
     client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
     messages = [{"role": "system", "content": system_message}]
-    # Convert history to OpenAI format (assuming history is list of [user_str, bot_str] pairs)
-    for user_msg, bot_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if bot_msg:
-            messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
     # Stream the text response
     response = ""
-    for chunk in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
-        choices = chunk.choices
         token = ""
         if len(choices) and choices[0].delta.content:
             token = choices[0].delta.content
-        response += token
-        # Yield partial text update to the chatbot (bot_msg is str for now)
-        yield history + [[message, response]], None  # None for audio placeholder
-    # After full text is generated, create TTS audio
-    inputs = tts_tokenizer(response, return_tensors="pt")
-    with torch.no_grad():
-        waveform = tts_model(**inputs).waveform.squeeze().cpu().numpy()
-    # Convert waveform to bytes (WAV format) for Gradio Audio
-    buffer = io.BytesIO()
-    sf.write(buffer, waveform, tts_model.config.sampling_rate, format='wav')
-    audio_bytes = buffer.getvalue()
-    # Yield final update: chatbot with full text, and audio bytes
-    yield history + [[message, response]], audio_bytes
 with gr.Blocks() as demo:
     with gr.Row():
@@ -68,7 +47,7 @@ with gr.Blocks() as demo:
             gr.LoginButton(label="Login with Hugging Face")
         with gr.Column(scale=4):
             chatbot = gr.Chatbot(height=500)
-            audio_output = gr.Audio(label="Play TTS Audio (Chhattisgarhi)", interactive=False)
             msg = gr.Textbox(label="Your message")
             submit_btn = gr.Button("Send")

 import gradio as gr
 from huggingface_hub import InferenceClient
 def respond(
     message,
+    history: list[dict[str, str]],
     system_message,
     max_tokens,
     temperature,
     """
     client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
     messages = [{"role": "system", "content": system_message}]
+    messages.extend(history)
     messages.append({"role": "user", "content": message})
     # Stream the text response
     response = ""
+    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,
         temperature=temperature,
         top_p=top_p,
     ):
+        choices = message.choices
         token = ""
         if len(choices) and choices[0].delta.content:
             token = choices[0].delta.content
+        response += token
+        yield response
 with gr.Blocks() as demo:
     with gr.Row():
             gr.LoginButton(label="Login with Hugging Face")
         with gr.Column(scale=4):
             chatbot = gr.Chatbot(height=500)
             msg = gr.Textbox(label="Your message")
             submit_btn = gr.Button("Send")