Spaces:

kathirog
/

fumblebots

Sleeping

App Files Files Community

kathirog commited on Feb 28, 2025

Commit

522a2f7

verified ·

1 Parent(s): c2e7015

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -80

app.py CHANGED Viewed

@@ -1,44 +1,61 @@
-import argparse
-import urllib.request
-from threading import Thread
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
-import speech_recognition as sr
 import pyttsx3
 from huggingface_hub import InferenceClient
-# API Key for Hugging Face Model
-API_KEY = "YOUR_API_KEY_HERE"  # Replace with your actual API key
-# Initialize InferenceClient with the API key
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=API_KEY)
-# Initialize text-to-speech engine
-engine = pyttsx3.init()
-# Load the model for Socratic chatbot
-def load_model():
-    parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
-    parser.add_argument("--load-in-4bit", action="store_true", help="Load base model with 4bit quantization (requires GPU)")
-    parser.add_argument("--server-port", type=int, default=2121, help="The port the chatbot server listens to")
-    args = parser.parse_args()
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = AutoModelForCausalLM.from_pretrained(
-        "eurecom-ds/Phi-3-mini-4k-socratic",
-        torch_dtype=torch.bfloat16,
-        load_in_4bit=args.load_in_4bit,
-        trust_remote_code=True,
-        device_map=device,
-    )
-    tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    return model, tokenizer, streamer, device
 # Convert voice input (audio) to text
@@ -54,62 +71,57 @@ def voice_to_text(audio):
             text = "Could not request results from Google Speech Recognition service."
     return text
 # Convert text to speech (voice output)
 def text_to_voice(text):
-    audio_file = 'response.mp3'
-    engine.save_to_file(text, audio_file)
     engine.runAndWait()
-    return audio_file
-# Respond with Socratic Chatbot logic and text-to-speech
-def respond(message, history, audio_input=None):
-    if audio_input:
-        message = voice_to_text(audio_input)  # Convert audio input to text if available
-    # Prepare the prompt for the Socratic model
-    user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
-    last_query: str = history[-1][0]
-    user_query += f"Student: {last_query}"
-    content = f"Teacher: {user_query}"
-    # Get the model's response
-    model, tokenizer, streamer, device = load_model()
-    formatted = tokenizer.apply_chat_template([{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True)
-    encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
-    thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
-    thread.start()
-    response = ""
-    for word in streamer:
-        response += word
-    # Convert response text to speech (audio output)
-    audio_output = text_to_voice(response)
-    return response, audio_output
-# Gradio UI with text and audio input/output
 def create_interface():
-    demo = gr.Interface(
-        fn=respond,
-        inputs=[
-            gr.Textbox(label="Text Input (or leave blank to use audio input)", placeholder="Enter your message here..."),
-            gr.Audio(type="filepath", label="Audio Input (or leave blank to use text input)"),
-        ],
-        outputs=[
-            gr.Textbox(label="Text Output"),
-            gr.Audio(label="Voice Output"),
-        ]
-    )
-    demo.launch()
 if __name__ == "__main__":
-    create_interface()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import pyttsx3
+import speech_recognition as sr
 from huggingface_hub import InferenceClient
+# API Key for HuggingFace InferenceClient
+API_KEY = "AIzaSyBWBxsPBykuJ6z_kMYlAq9k9u3YU2Uy8Oc"
+# Initialize the InferenceClient (replace with your model name if necessary)
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=API_KEY)
+# Hardcoded system message
+system_message = "You are a friendly and helpful chatbot."
+# Load model with quantization and auto-device setup for faster loading
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = AutoModelForCausalLM.from_pretrained(
+    "eurecom-ds/Phi-3-mini-4k-socratic",  # Replace with your model
+    torch_dtype=torch.bfloat16,
+    load_in_4bit=True,  # Enable 4-bit quantization for faster inference
+    device_map="auto",  # Automatically use GPU if available
+)
+# Tokenizer for the model
+tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
+# Function to handle text responses
+def respond(message, history: list, audio_input=None):
+    if audio_input:
+        message = voice_to_text(audio_input)
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    response = ""  # Initialize response
+    try:
+        for message_response in client.chat_completion(messages, max_tokens=150, stream=True):  # Reduce max tokens for faster response
+            if 'choices' in message_response and len(message_response['choices']) > 0:
+                delta_content = message_response['choices'][0].get('delta', {}).get('content', '')
+                if delta_content:
+                    response += delta_content
+            else:
+                print("Error: No valid content in response")
+                break
+    except Exception as e:
+        print(f"Error during API request: {e}")
+    return response
 # Convert voice input (audio) to text
             text = "Could not request results from Google Speech Recognition service."
     return text
 # Convert text to speech (voice output)
 def text_to_voice(text):
+    engine = pyttsx3.init()
+    engine.save_to_file(text, 'response.mp3')
     engine.runAndWait()
+    return 'response.mp3'
+# Gradio Interface
 def create_interface():
+    with gr.Blocks() as demo:
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox(label="Enter your message")
+        clear = gr.Button("Clear")
+        # Inputs and Outputs for Text and Audio
+        with gr.Row():
+            text_input = gr.Textbox(label="Text Input", placeholder="Enter your message...")
+            audio_input = gr.Audio(type="filepath", label="Audio Input (Optional)")
+        # Outputs for Text and Audio Response
+        with gr.Row():
+            text_output = gr.Textbox(label="Text Output")
+            audio_output = gr.Audio(label="Voice Output")
+        # Interaction logic
+        def user(user_message, history):
+            return "", history + [[user_message, ""]]
+        def bot(history):
+            user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
+            last_query = history[-1][0]
+            user_query += f"Student: {last_query}"
+            response = respond(user_query, history)
+            history[-1][1] = response
+            return history, response  # Return updated history and response
+        # Submit text input
+        msg.submit(user, [msg, chatbot], [msg, chatbot]).then(bot, [chatbot], [chatbot, text_output])
+        # Submit audio input
+        audio_input.change(user, [audio_input, chatbot], [audio_input, chatbot]).then(bot, [chatbot], [chatbot, text_output])
+        # Clear button
+        clear.click(lambda: None, None, chatbot, queue=False)
+    return demo
+# Launch Gradio app
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.queue()
+    demo.launch(server_name="0.0.0.0", server_port=2121)  # You can change port as needed