Spaces:

Krishnavamshithumma
/

Voice-Bot-AI

Sleeping

App Files Files Community

Krishnavamshithumma commited on Jun 16, 2025

Commit

b5d9400

verified ·

1 Parent(s): 3b4c90e

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -38

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
-import tempfile
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -11,64 +11,165 @@ system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When r
 - A Computer Science graduate from Neil Gogte Institute of Technology
 Answer questions about your background professionally but engagingly (2-3 sentences max)."""
-def speech_to_text(audio):
-    recognizer = sr.Recognizer()
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-        tmp_file.write(audio)
-        tmp_file.flush()
-        with sr.AudioFile(tmp_file.name) as source:
-            audio_data = recognizer.record(source)
-            try:
-                text = recognizer.recognize_google(audio_data)
-                return text
-            except sr.UnknownValueError:
-                return "❌ Could not understand the audio"
-            except sr.RequestError as e:
-                return f"❌ Speech recognition error: {e}"
-def chat_with_openai(user_input, history, api_key):
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
     try:
         client = OpenAI(api_key=api_key)
         messages = [{"role": "system", "content": system_prompt}]
         for entry in history:
-            messages.append({"role": "user", "content": entry[0]})
-            messages.append({"role": "assistant", "content": entry[1]})
-        messages.append({"role": "user", "content": user_input})
         response = client.chat.completions.create(
             model="gpt-4o",
             messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
         history.append((user_input, bot_reply))
-        return history, ""
     except Exception as e:
-        raise gr.Error(f"❌ Error: {str(e)}")
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
-    gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant (No JavaScript)")
-    api_key = gr.Textbox(label="�� OpenAI API Key", type="password")
-    chatbot = gr.Chatbot(height=400)
-    state = gr.State([])
-    with gr.Row():
-        voice_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak here")
-        transcribed_text = gr.Textbox(label="Transcribed Text")
-    # When audio is submitted, convert to text
-    voice_input.change(speech_to_text, voice_input, transcribed_text)
-    # When transcribed text is ready, send to OpenAI
-    transcribed_text.submit(
-        chat_with_openai,
-        [transcribed_text, state, api_key],
-        [chatbot, state, transcribed_text]
     )
     clear_btn = gr.Button("🗑️ Clear Chat")
     clear_btn.click(lambda: ([], []), None, [chatbot, state])
-demo.launch()

 import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
+import os
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 - A Computer Science graduate from Neil Gogte Institute of Technology
 Answer questions about your background professionally but engagingly (2-3 sentences max)."""
+# Initialize the SpeechRecognition Recognizer
+r = sr.Recognizer()
+def transcribe_audio_and_chat(audio_filepath, history, api_key):
     if not api_key:
+        # Raise a Gradio error to be displayed in the UI
         raise gr.Error("❌ Please enter your OpenAI API key.")
+    if audio_filepath is None:
+        # Raise a Gradio error if no audio is captured
+        raise gr.Error("No audio received. Please speak into the microphone.")
     try:
+        # Load the audio file
+        with sr.AudioFile(audio_filepath) as source:
+            audio_data = r.record(source) # read the entire audio file
+        # Perform speech recognition
+        try:
+            user_input = r.recognize_google(audio_data) # Using Google Web Speech API
+            print(f"Transcribed User Input: {user_input}") # For debugging purposes
+            # If transcription is successful, you might want to show it in the chat
+            # before the AI responds. For now, we'll just use it directly.
+        except sr.UnknownValueError:
+            # If speech is unintelligible
+            # Return current history and an error message for the chatbot
+            return history + [("", "Sorry, I could not understand the audio. Please try again.")], ""
+        except sr.RequestError as e:
+            # If API request fails
+            # Return current history and an error message for the chatbot
+            return history + [("", f"Could not request results from Google Speech Recognition service; {e}")], ""
+        finally:
+            # Always clean up the temporary audio file, regardless of success or failure
+            if os.path.exists(audio_filepath):
+                os.remove(audio_filepath)
+        # --- Proceed with OpenAI chat ---
         client = OpenAI(api_key=api_key)
+        # Build messages from history
         messages = [{"role": "system", "content": system_prompt}]
         for entry in history:
+            # Ensure history entries are tuples (user_message, bot_message)
+            if isinstance(entry, (list, tuple)) and len(entry) == 2:
+                messages.append({"role": "user", "content": entry[0]})
+                messages.append({"role": "assistant", "content": entry[1]})
+        messages.append({"role": "user", "content": user_input}) # Add the current user input
+        # Get response from OpenAI
         response = client.chat.completions.create(
             model="gpt-4o",
             messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
+        # Append the new user input and bot reply to the history
         history.append((user_input, bot_reply))
+        # Return the updated history for the chatbot component
+        # and an empty string for the audio input, effectively clearing it for next input.
+        return history, None # Use None for the audio input to reset the component
     except Exception as e:
+        # Catch any other unexpected errors
+        print(f"An unexpected error occurred: {e}") # Log the error
+        # Raise a Gradio error for display in the UI
+        raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
+# --- Gradio UI setup remains mostly the same ---
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
+    gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
+    # Add custom CSS
+    gr.HTML("""
+    <style>
+        #chatBox {
+            height: 60vh;
+            overflow-y: auto;
+            padding: 20px;
+            border-radius: 10px;
+            background: #f9f9f9;
+            margin-bottom: 20px;
+        }
+        .message {
+            margin: 10px 0;
+            padding: 12px;
+            border-radius: 8px;
+        }
+        .user {
+            background: #e3f2fd;
+            text-align: right;
+        }
+        .bot {
+            background: #f5f5f5;
+        }
+        #audioInputComponent { /* New ID for the audio component */
+            margin-top: 20px;
+        }
+        .key-status {
+            padding: 5px;
+            margin-top: 5px;
+            border-radius: 4px;
+        }
+        .success {
+            background: #d4edda;
+            color: #155724;
+        }
+        .error {
+            background: #f8d7da;
+            color: #721c24;
+        }
+    </style>
+    """)
+    api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
+    key_status = gr.HTML("<div id='keyStatus'></div>")
+    chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
+    state = gr.State([]) # Stores the chat history
+    audio_input = gr.Audio(
+        sources=["microphone"],
+        type="filepath",
+        label="Speak your message here",
+        elem_id="audioInputComponent",
+        streaming=False
     )
     clear_btn = gr.Button("🗑️ Clear Chat")
+    # Event handler: When audio input is recorded and submitted (by stopping recording)
+    audio_input.change(
+        transcribe_audio_and_chat,
+        inputs=[audio_input, state, api_key],
+        outputs=[chatbot, state] # Ensure chatbot and state are updated
+    )
+    # JavaScript for API key status (still useful for UX)
+    gr.HTML("""
+    <script>
+        document.getElementById("apiKeyInput").addEventListener("input", function() {
+            const apiKey = this.value.trim();
+            const keyStatus = document.getElementById("keyStatus");
+            if (apiKey) {
+                keyStatus.innerHTML = '<div class="key-status success">API Key saved successfully!</div>';
+            } else {
+                keyStatus.innerHTML = '<div class="key-status error">Please enter a valid API key</div>';
+            }
+        });
+        // Focus on API key input on load
+        document.querySelector("#apiKeyInput input").focus();
+    </script>
+    """)
     clear_btn.click(lambda: ([], []), None, [chatbot, state])
+demo.launch()