Spaces:

umaradnaan
/

speaklearn-ai

Sleeping

App Files Files Community

umaradnaan commited on Dec 6, 2025

Commit

af7fc5f

verified ·

1 Parent(s): 0f08dd3

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -60

app.py CHANGED Viewed

@@ -3,98 +3,148 @@ import gradio as gr
 import google.generativeai as genai
 import speech_recognition as sr
 import tempfile
 # -----------------------------
-# Gemini Setup
 # -----------------------------
-genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
 model = genai.GenerativeModel("gemini-1.5-flash")
 # -----------------------------
-# Voice to Text
 # -----------------------------
 recognizer = sr.Recognizer()
-def voice_to_text(audio_bytes):
-    if audio_bytes is None:
         return ""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        tmp.write(audio_bytes)
-        path = tmp.name
-    with sr.AudioFile(path) as source:
-        audio = recognizer.record(source)
-        return recognizer.recognize_google(audio)
 # -----------------------------
-# AI Logic
 # -----------------------------
 def generate_reply(message, history):
-    if history is None:
-        history = []
-    # Sentence correction
-    corrected = model.generate_content(
-        f"Fix the user's sentence ONLY if it is grammatically wrong.\nUser: {message}"
-    ).text
-    if corrected.lower() != message.lower():
-        history.append(("User", message))
-        history.append(("AI", f"❌ Incorrect. Repeat correctly:\n➡️ {corrected}"))
         return history
-    # Natural conversation
-    response = model.generate_content(
-        f"Continue conversation naturally.\nUser: {corrected}"
-    ).text
     history.append(("User", corrected))
     history.append(("AI", response))
     return history
 # -----------------------------
-# Gradio Streaming
 # -----------------------------
-def stream_reply(message, history):
-    if history is None:
-        history = []
-    reply = model.generate_content(
-        f"User: {message}", stream=True
-    )
-    full = ""
-    for chunk in reply:
-        if chunk.text:
-            full += chunk.text
-            yield full
 # -----------------------------
-# UI
 # -----------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as app:
-    gr.Markdown("### 🎤 AI Voice Conversation Bot (Gemini 1.5 Flash)")
-    gr.Markdown("Speak a topic → AI starts → If you pronounce wrong → AI corrects you until perfect.")
-    chatbot = gr.Chatbot(height=500, show_label=False)
     with gr.Row():
-        txt = gr.Textbox(placeholder="Type or speak...", scale=3)
-        mic = gr.Audio(type="bytes", label="🎤 Speak", scale=2)
-        send = gr.Button("Send", scale=1)
-    # Voice handler
-    def handle_voice(audio, history):
-        text = voice_to_text(audio)
-        if not text:
-            return history, ""
-        history.append(("User", text))
-        return history, text
-    mic.change(handle_voice, [mic, chatbot], [chatbot, txt])
-    # Text submit
-    send.click(generate_reply, [txt, chatbot], chatbot)
-    txt.submit(generate_reply, [txt, chatbot], chatbot)
 app.launch()

 import google.generativeai as genai
 import speech_recognition as sr
 import tempfile
+import time
 # -----------------------------
+# CONFIG
 # -----------------------------
+# Make sure you set GEMINI_API_KEY in your Space secrets (Settings → Variables)
+API_KEY = os.getenv("GEMINI_API_KEY")
+if not API_KEY:
+    raise RuntimeError("Please set the GEMINI_API_KEY environment variable in Space settings.")
+genai.configure(api_key=API_KEY)
 model = genai.GenerativeModel("gemini-1.5-flash")
 # -----------------------------
+# Speech recognition helper
 # -----------------------------
 recognizer = sr.Recognizer()
+def audiofile_to_text(audio_filepath):
+    """Transcribe an audio file (wav/m4a/etc.) to text using SpeechRecognition."""
+    if not audio_filepath:
+        return ""
+    try:
+        with sr.AudioFile(audio_filepath) as source:
+            audio = recognizer.record(source)
+        text = recognizer.recognize_google(audio)
+        return text
+    except Exception as e:
+        # return empty string on failure and log to console
+        print("Transcription error:", e)
         return ""
 # -----------------------------
+# AI logic: correction + conversation
 # -----------------------------
 def generate_reply(message, history):
+    """
+    Main conversation step:
+    - If user's sentence is incorrect, generator will return a corrected sentence.
+    - If corrected != user sentence -> prompt user to repeat corrected sentence.
+    - Otherwise continue the conversation normally.
+    """
+    history = history or []
+    # sanitize
+    user_msg = (message or "").strip()
+    if user_msg == "":
+        history.append(("AI", "❌ I didn't receive a message. Please type or speak something."))
+        return history
+    # Ask Gemini to correct the user's sentence (if wrong)
+    try:
+        correction_prompt = (
+            "You are a pronunciation and grammar tutor. "
+            "Given the user's sentence, if it contains grammatical or word errors, "
+            "output the corrected sentence only. If it's already correct, output the same sentence.\n\n"
+            f"User sentence: \"{user_msg}\""
+        )
+        corrected = model.generate_content(correction_prompt).text.strip()
+    except Exception as e:
+        print("Error calling Gemini (correction):", e)
+        history.append(("AI", "⚠️ Error contacting language model (correction). Try again later."))
+        return history
+    # If model gives a different sentence -> ask user to repeat
+    if corrected.lower() != user_msg.lower():
+        history.append(("User", user_msg))
+        history.append(("AI", f"❌ Incorrect. Please repeat this sentence exactly:\n➡ {corrected}"))
+        # keep expected sentence implicit — the next user reply should match corrected
         return history
+    # Otherwise, continue the conversation
+    try:
+        convo_prompt = (
+            "You are a friendly conversational tutor. Continue the conversation naturally, "
+            "ask a short follow-up question or make a short comment relevant to the user's message.\n\n"
+            f"User: \"{corrected}\""
+        )
+        response = model.generate_content(convo_prompt).text.strip()
+    except Exception as e:
+        print("Error calling Gemini (conversation):", e)
+        history.append(("AI", "⚠️ Error contacting language model (conversation). Try again later."))
+        return history
     history.append(("User", corrected))
     history.append(("AI", response))
     return history
 # -----------------------------
+# Transcribe audio button handler
 # -----------------------------
+def transcribe_and_show(audio_filepath, history):
+    """
+    Use this when user records audio and clicks "Transcribe".
+    This function:
+      - transcribes audio file to text
+      - appends a chat entry showing the transcribed topic/text
+      - returns updated chat history and the transcribed text for the textbox
+    """
+    history = history or []
+    text = audiofile_to_text(audio_filepath)
+    if text == "":
+        history.append(("AI", "❌ Could not transcribe audio. Please try again or use a clearer recording."))
+        return history, ""
+    # show transcribed text in chat and return it to the text box for editing/sending
+    history.append(("User (transcribed)", text))
+    return history, text
+# -----------------------------
+# Reset conversation
+# -----------------------------
+def reset_chat():
+    return []
 # -----------------------------
+# Gradio UI
 # -----------------------------
+title_md = """
+# 🎤 AI Voice Conversation Tutor
+Speak a topic (or type it). The AI will start the conversation.
+If you speak/sentence incorrectly, the AI will show the corrected sentence and ask you to repeat it until correct.
+"""
+with gr.Blocks() as app:
+    gr.Markdown(title_md)
+    chatbot = gr.Chatbot(value=[], label="Conversation", elem_id="chatbox")
     with gr.Row():
+        with gr.Column(scale=3):
+            txt = gr.Textbox(placeholder="Type your message or use the microphone and Transcribe...", label="Message")
+            send = gr.Button("Send")
+            reset = gr.Button("Reset Conversation")
+        with gr.Column(scale=2):
+            audio = gr.Audio(source="upload", type="filepath", label="Record or upload audio (wav/m4a/mp3)")
+            transcribe = gr.Button("Transcribe Audio")
+    # Hook up events
+    transcribe.click(transcribe_and_show, inputs=[audio, chatbot], outputs=[chatbot, txt])
+    send.click(generate_reply, inputs=[txt, chatbot], outputs=chatbot)
+    txt.submit(generate_reply, inputs=[txt, chatbot], outputs=chatbot)
+    reset.click(lambda: [], outputs=chatbot)
+    gr.Markdown("**How to use:**\n\n1. Speak a topic using the audio control and click **Transcribe Audio** (or type the topic in the box).  \n2. The AI will start the conversation.  \n3. If you pronounce incorrectly, AI will show the corrected sentence — repeat it (record & transcribe or type) until correct.  \n")
 app.launch()