Spaces:

umaradnaan
/

speaklearn-ai

Sleeping

App Files Files Community

umaradnaan commited on Dec 6, 2025

Commit

0f08dd3

verified ·

1 Parent(s): 373b471

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -57

app.py CHANGED Viewed

@@ -3,114 +3,98 @@ import gradio as gr
 import google.generativeai as genai
 import speech_recognition as sr
 import tempfile
-import base64
-import time
 # -----------------------------
-# Gemini 1.5 Flash Setup
 # -----------------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 model = genai.GenerativeModel("gemini-1.5-flash")
 # -----------------------------
-# Voice to Text Setup
 # -----------------------------
 recognizer = sr.Recognizer()
 def voice_to_text(audio_bytes):
-    """Convert uploaded mic audio to text."""
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
         tmp.write(audio_bytes)
-        temp_path = tmp.name
-    with sr.AudioFile(temp_path) as source:
         audio = recognizer.record(source)
         return recognizer.recognize_google(audio)
 # -----------------------------
-# Chat + Correction Loop
 # -----------------------------
-def generate_reply(user_input, history):
     if history is None:
         history = []
-    # Auto-correction logic
     corrected = model.generate_content(
-        f"Correct the user's sentence only if it's wrong. "
-        f"If correct, return same sentence.\nSentence: {user_input}"
     ).text
-    # If wrong → ask again until correct
-    if corrected.lower() != user_input.lower():
-        return (
-            history + [
-                ("User", user_input),
-                ("AI", f"❌ Incorrect. Try again.\nCorrect sentence: **{corrected}**")
-            ],
-            corrected,
-        )
-    # Generate normal chatbot response
     response = model.generate_content(
-        f"You are a friendly AI assistant. Continue conversation naturally.\nUser: {corrected}"
     ).text
     history.append(("User", corrected))
     history.append(("AI", response))
-    return history, response
 # -----------------------------
-# Gradio UI
 # -----------------------------
-css = """
-#chatbox {height: 520px !important;}
-"""
-def stream_chat(message, history):
-    """Stream response in real-time"""
-    history = history or []
     reply = model.generate_content(
-        message, stream=True
     )
-    output = ""
     for chunk in reply:
         if chunk.text:
-            output += chunk.text
-            yield output
 # -----------------------------
-# UI Layout (Layout 1)
 # -----------------------------
-with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
-    gr.HTML("<h2 style='text-align:center;'>🎤 AI Voice Chatbot (Gemini 1.5 Flash)</h2>")
-    chatbot = gr.Chatbot(
-        avatar_images=("assets/avatars/ai_avatar.png", "assets/avatars/user_avatar.png"),
-        elem_id="chatbox",
-        height=530
-    )
     with gr.Row():
-        msg = gr.Textbox(placeholder="Type or speak your message...")
-        mic = gr.Audio(source="microphone", type="bytes", label="🎤 Speak your topic")
-        send_btn = gr.Button("Send")
-    # Voice input handler
     def handle_voice(audio, history):
-        if audio is None:
-            return history
         text = voice_to_text(audio)
         history.append(("User", text))
         return history, text
-    mic.submit(handle_voice, [mic, chatbot], [chatbot, msg])
-    # Text conversation submit
-    send_btn.click(stream_chat, [msg, chatbot], chatbot)
-    msg.submit(stream_chat, [msg, chatbot], chatbot)
 app.launch()

 import google.generativeai as genai
 import speech_recognition as sr
 import tempfile
 # -----------------------------
+# Gemini Setup
 # -----------------------------
 genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 model = genai.GenerativeModel("gemini-1.5-flash")
 # -----------------------------
+# Voice to Text
 # -----------------------------
 recognizer = sr.Recognizer()
 def voice_to_text(audio_bytes):
+    if audio_bytes is None:
+        return ""
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
         tmp.write(audio_bytes)
+        path = tmp.name
+    with sr.AudioFile(path) as source:
         audio = recognizer.record(source)
         return recognizer.recognize_google(audio)
 # -----------------------------
+# AI Logic
 # -----------------------------
+def generate_reply(message, history):
     if history is None:
         history = []
+    # Sentence correction
     corrected = model.generate_content(
+        f"Fix the user's sentence ONLY if it is grammatically wrong.\nUser: {message}"
     ).text
+    if corrected.lower() != message.lower():
+        history.append(("User", message))
+        history.append(("AI", f"❌ Incorrect. Repeat correctly:\n➡️ {corrected}"))
+        return history
+    # Natural conversation
     response = model.generate_content(
+        f"Continue conversation naturally.\nUser: {corrected}"
     ).text
     history.append(("User", corrected))
     history.append(("AI", response))
+    return history
 # -----------------------------
+# Gradio Streaming
 # -----------------------------
+def stream_reply(message, history):
+    if history is None:
+        history = []
     reply = model.generate_content(
+        f"User: {message}", stream=True
     )
+    full = ""
     for chunk in reply:
         if chunk.text:
+            full += chunk.text
+            yield full
 # -----------------------------
+# UI
 # -----------------------------
+with gr.Blocks(theme=gr.themes.Soft()) as app:
+    gr.Markdown("### 🎤 AI Voice Conversation Bot (Gemini 1.5 Flash)")
+    gr.Markdown("Speak a topic → AI starts → If you pronounce wrong → AI corrects you until perfect.")
+    chatbot = gr.Chatbot(height=500, show_label=False)
     with gr.Row():
+        txt = gr.Textbox(placeholder="Type or speak...", scale=3)
+        mic = gr.Audio(type="bytes", label="🎤 Speak", scale=2)
+        send = gr.Button("Send", scale=1)
+    # Voice handler
     def handle_voice(audio, history):
         text = voice_to_text(audio)
+        if not text:
+            return history, ""
         history.append(("User", text))
         return history, text
+    mic.change(handle_voice, [mic, chatbot], [chatbot, txt])
+    # Text submit
+    send.click(generate_reply, [txt, chatbot], chatbot)
+    txt.submit(generate_reply, [txt, chatbot], chatbot)
 app.launch()