Spaces:

kathirog
/

fumblebots

Sleeping

App Files Files Community

kathirog commited on Feb 28, 2025

Commit

8a3153b

verified ·

1 Parent(s): f404bdb

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -77

app.py CHANGED Viewed

@@ -4,9 +4,42 @@ from threading import Thread
 import gradio as gr
 import torch
-import pyttsx3
-import speech_recognition as sr
 from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
 # Convert voice input (audio) to text
 def voice_to_text(audio):
@@ -21,81 +54,62 @@ def voice_to_text(audio):
             text = "Could not request results from Google Speech Recognition service."
     return text
 # Convert text to speech (voice output)
 def text_to_voice(text):
-    engine = pyttsx3.init()
-    engine.save_to_file(text, 'response.mp3')
     engine.runAndWait()
-    return 'response.mp3'
-# Model loading and configuration
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
-    parser.add_argument("--load-in-4bit",
-                        action="store_true",
-                        help="Load base model with 4bit quantization (requires GPU)")
-    parser.add_argument("--server-port",
-                        type=int,
-                        default=2121,
-                        help="The port the chatbot server listens to")
-    args = parser.parse_args()
-    with gr.Blocks() as demo:
-        chatbot = gr.Chatbot()
-        msg = gr.Textbox()
-        audio_input = gr.Audio(type="filepath", label="Audio Input (or leave blank to use text input)")
-        clear = gr.Button("Clear")
-        with urllib.request.urlopen(
-                "https://raw.githubusercontent.com/GiovanniGatti/socratic-llm/kdd-2024/templates/inference.txt"
-        ) as f:
-            inference_prompt_template = f.read().decode('utf-8')
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        model = AutoModelForCausalLM.from_pretrained(
-            "eurecom-ds/Phi-3-mini-4k-socratic",
-            torch_dtype=torch.bfloat16,
-            load_in_4bit=args.load_in_4bit,
-            trust_remote_code=True,
-            device_map=device,
-        )
-        tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
-        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-        def user(user_message, history):
-            return "", history + [[user_message, ""]]
-        def bot(history, audio=None):
-            user_query = ""
-            if audio:
-                # Convert audio to text
-                user_query = voice_to_text(audio)
-            else:
-                user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
-                last_query: str = history[-1][0]
-                user_query += f"Student: {last_query}"
-            content = inference_prompt_template.format(input=user_query)
-            formatted = tokenizer.apply_chat_template(
-                [{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True
-            )
-            encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
-            thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
-            thread.start()
-            for word in streamer:
-                history[-1][1] += word
-                yield history, text_to_voice(history[-1][1])
-        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, audio_input], [chatbot, gr.Audio()])
-        clear.click(lambda: None, None, chatbot, queue=False)
-    demo.queue()
-    demo.launch(server_name="0.0.0.0", server_port=args.server_port)

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
+import speech_recognition as sr
+import pyttsx3
+from huggingface_hub import InferenceClient
+# API Key for Hugging Face Model
+API_KEY = "YOUR_API_KEY_HERE"  # Replace with your actual API key
+# Initialize InferenceClient with the API key
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=API_KEY)
+# Initialize text-to-speech engine
+engine = pyttsx3.init()
+# Load the model for Socratic chatbot
+def load_model():
+    parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
+    parser.add_argument("--load-in-4bit", action="store_true", help="Load base model with 4bit quantization (requires GPU)")
+    parser.add_argument("--server-port", type=int, default=2121, help="The port the chatbot server listens to")
+    args = parser.parse_args()
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = AutoModelForCausalLM.from_pretrained(
+        "eurecom-ds/Phi-3-mini-4k-socratic",
+        torch_dtype=torch.bfloat16,
+        load_in_4bit=args.load_in_4bit,
+        trust_remote_code=True,
+        device_map=device,
+    )
+    tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    return model, tokenizer, streamer, device
 # Convert voice input (audio) to text
 def voice_to_text(audio):
             text = "Could not request results from Google Speech Recognition service."
     return text
 # Convert text to speech (voice output)
 def text_to_voice(text):
+    audio_file = 'response.mp3'
+    engine.save_to_file(text, audio_file)
     engine.runAndWait()
+    return audio_file
+# Respond with Socratic Chatbot logic and text-to-speech
+def respond(message, history, audio_input=None):
+    if audio_input:
+        message = voice_to_text(audio_input)  # Convert audio input to text if available
+    # Prepare the prompt for the Socratic model
+    user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
+    last_query: str = history[-1][0]
+    user_query += f"Student: {last_query}"
+    content = f"Teacher: {user_query}"
+    # Get the model's response
+    model, tokenizer, streamer, device = load_model()
+    formatted = tokenizer.apply_chat_template([{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True)
+    encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
+    thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
+    thread.start()
+    response = ""
+    for word in streamer:
+        response += word
+    # Convert response text to speech (audio output)
+    audio_output = text_to_voice(response)
+    return response, audio_output
+# Gradio UI with text and audio input/output
+def create_interface():
+    demo = gr.Interface(
+        fn=respond,
+        inputs=[
+            gr.Textbox(label="Text Input (or leave blank to use audio input)", placeholder="Enter your message here..."),
+            gr.Audio(type="filepath", label="Audio Input (or leave blank to use text input)"),
+        ],
+        outputs=[
+            gr.Textbox(label="Text Output"),
+            gr.Audio(label="Voice Output"),
+        ]
+    )
+    demo.launch()
+if __name__ == "__main__":
+    create_interface()