Spaces:

kathirog
/

fumblebots

Sleeping

App Files Files Community

kathirog commited on Feb 28, 2025

Commit

f404bdb

verified ·

1 Parent(s): ab37b41

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -78

app.py CHANGED Viewed

@@ -4,83 +4,98 @@ from threading import Thread
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
-from accelerate import Accelerator  # Use Accelerate for better performance
-# Argument parsing (optional, can be omitted if not required)
-parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
-parser.add_argument("--load-in-4bit",
-                    action="store_true",
-                    help="Load base model with 4bit quantization (requires GPU)")
-parser.add_argument("--server-port",
-                    type=int,
-                    default=2121,
-                    help="The port the chatbot server listens to")
-args = parser.parse_args()
-# Accelerator setup to manage devices efficiently (CPU/GPU)
-accelerator = Accelerator()
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox()
-    clear = gr.Button("Clear")
-    # Load prompt template from external file
-    with urllib.request.urlopen(
-            "https://raw.githubusercontent.com/GiovanniGatti/socratic-llm/kdd-2024/templates/inference.txt"
-    ) as f:
-        inference_prompt_template = f.read().decode('utf-8')
-    # Detect device (GPU if available)
-    device = accelerator.device
-    # Load model and tokenizer with efficient memory management
-    model = AutoModelForCausalLM.from_pretrained(
-        "eurecom-ds/Phi-3-mini-4k-socratic",
-        torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,  # Use bfloat16 on GPU
-        load_in_4bit=args.load_in_4bit,
-        trust_remote_code=True,
-        device_map="auto",  # Automatically distribute model to available devices
-    ).to(device)
-    tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    # Function to handle user messages
-    def user(user_message, history):
-        return "", history + [[user_message, ""]]
-    # Function to generate bot responses
-    def bot(history):
-        user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
-        last_query = history[-1][0]
-        user_query += f"Student: {last_query}"
-        content = inference_prompt_template.format(input=user_query)
-        formatted = tokenizer.apply_chat_template(
-            [{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True
         )
-        encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
-        # Use threads to handle model generation asynchronously
-        thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
-        thread.start()
-        for word in streamer:
-            history[-1][1] += word
-            yield history
-    # User interaction handling
-    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot], chatbot)
-    # Clear chat button functionality
-    clear.click(lambda: None, None, chatbot, queue=False)
-# Launch the Gradio app
-demo.queue()
-demo.launch(server_name="0.0.0.0", server_port=args.server_port)

 import gradio as gr
 import torch
+import pyttsx3
+import speech_recognition as sr
+from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM
+# Convert voice input (audio) to text
+def voice_to_text(audio):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio.name) as source:
+        audio_data = recognizer.record(source)
+        try:
+            text = recognizer.recognize_google(audio_data)  # Convert to text using Google's speech recognition
+        except sr.UnknownValueError:
+            text = "Sorry, I could not understand the audio."
+        except sr.RequestError:
+            text = "Could not request results from Google Speech Recognition service."
+    return text
+# Convert text to speech (voice output)
+def text_to_voice(text):
+    engine = pyttsx3.init()
+    engine.save_to_file(text, 'response.mp3')
+    engine.runAndWait()
+    return 'response.mp3'
+# Model loading and configuration
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(prog="SOCRATIC-CHATBOT", description="Socratic chatbot")
+    parser.add_argument("--load-in-4bit",
+                        action="store_true",
+                        help="Load base model with 4bit quantization (requires GPU)")
+    parser.add_argument("--server-port",
+                        type=int,
+                        default=2121,
+                        help="The port the chatbot server listens to")
+    args = parser.parse_args()
+    with gr.Blocks() as demo:
+        chatbot = gr.Chatbot()
+        msg = gr.Textbox()
+        audio_input = gr.Audio(type="filepath", label="Audio Input (or leave blank to use text input)")
+        clear = gr.Button("Clear")
+        with urllib.request.urlopen(
+                "https://raw.githubusercontent.com/GiovanniGatti/socratic-llm/kdd-2024/templates/inference.txt"
+        ) as f:
+            inference_prompt_template = f.read().decode('utf-8')
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        model = AutoModelForCausalLM.from_pretrained(
+            "eurecom-ds/Phi-3-mini-4k-socratic",
+            torch_dtype=torch.bfloat16,
+            load_in_4bit=args.load_in_4bit,
+            trust_remote_code=True,
+            device_map=device,
         )
+        tokenizer = AutoTokenizer.from_pretrained("eurecom-ds/Phi-3-mini-4k-socratic")
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        def user(user_message, history):
+            return "", history + [[user_message, ""]]
+        def bot(history, audio=None):
+            user_query = ""
+            if audio:
+                # Convert audio to text
+                user_query = voice_to_text(audio)
+            else:
+                user_query = "".join(f"Student: {s}\nTeacher: {t}\n" for s, t in history[:-1])
+                last_query: str = history[-1][0]
+                user_query += f"Student: {last_query}"
+            content = inference_prompt_template.format(input=user_query)
+            formatted = tokenizer.apply_chat_template(
+                [{"role": "user", "content": content}], tokenize=False, add_generation_prompt=True
+            )
+            encoded_inputs = tokenizer([formatted], return_tensors="pt").to(device)
+            thread = Thread(target=model.generate, kwargs=dict(encoded_inputs, max_new_tokens=250, streamer=streamer))
+            thread.start()
+            for word in streamer:
+                history[-1][1] += word
+                yield history, text_to_voice(history[-1][1])
+        msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, [chatbot, audio_input], [chatbot, gr.Audio()])
+        clear.click(lambda: None, None, chatbot, queue=False)
+    demo.queue()
+    demo.launch(server_name="0.0.0.0", server_port=args.server_port)