Spaces:

umaradnaan
/

speaklearn-ai

Sleeping

File size: 3,001 Bytes

8aa2d68
86e55bc
 
8aa2d68
540430a
 
d4874be
 
 
540430a
86e55bc
8aa2d68
d4874be
540430a
d4874be
86e55bc
 
540430a
af7fc5f
540430a
af7fc5f
d4874be
af7fc5f
d4874be
8aa2d68
d4874be
 
 
540430a
 
 
 
 
d4874be
540430a
 
 
 
 
d4874be
540430a
 
d4874be
 
 
 
 
 
540430a
d4874be
 
540430a
af7fc5f
8aa2d68
d4874be
 
 
 
540430a
 
 
 
d4874be
 
 
 
540430a
 
0f08dd3
 
540430a
 
 
d4874be
af7fc5f
8aa2d68
540430a
 
 
d4874be
 
 
 
540430a
 
8aa2d68
d4874be
86e55bc
8aa2d68
540430a
 
 
d4874be
540430a
d4874be
540430a
 
 
d4874be
 
540430a

import os
import gradio as gr
import google.generativeai as genai
import speech_recognition as sr
import tempfile

# -----------------------------------------
# GEMINI CONFIGURATION
# -----------------------------------------
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-1.5-flash")

# -----------------------------------------
# SPEECH RECOGNITION
# -----------------------------------------
recognizer = sr.Recognizer()

def voice_to_text(audio_path):
    try:
        with sr.AudioFile(audio_path) as source:
            audio = recognizer.record(source)
        return recognizer.recognize_google(audio)
    except Exception as e:
        return f"Error converting voice: {str(e)}"

# -----------------------------------------
# CHAT RESPONSE FUNCTION
# -----------------------------------------
def chat_response(message, history):

    if history is None:
        history = []

    # Convert history → Gemini messages format
    gemini_messages = []
    for user_msg, bot_msg in history:
        gemini_messages.append({"role": "user", "content": user_msg})
        gemini_messages.append({"role": "assistant", "content": bot_msg})

    # Add the new user message
    gemini_messages.append({"role": "user", "content": message})

    # ---------------------
    # AUTO-CORRECTION STEP
    # ---------------------
    correction = model.generate_content(
        [{"role": "user", "content": f"Correct this sentence only if needed:\n{message}"}]
    ).text.strip()

    if correction.lower() != message.lower():
        bot_reply = f"❌ Incorrect sentence.\nCorrect form → **{correction}**\nTry again!"
        history.append((message, bot_reply))
        return history

    # ---------------------
    # NORMAL RESPONSE
    # ---------------------
    response = model.generate_content(gemini_messages).text

    history.append((message, response))
    return history

# -----------------------------------------
# HANDLE VOICE INPUT
# -----------------------------------------
def handle_voice(audio_file, history):

    if audio_file is None:
        return history

    text = voice_to_text(audio_file)

    if "error" in text.lower():
        history.append(("Voice Input", text))
        return history

    return chat_response(text, history)


# -----------------------------------------
# GRADIO UI (OLD VERSION COMPATIBLE)
# -----------------------------------------
with gr.Blocks() as app:

    gr.HTML("<h2 style='text-align:center;'>🎤 AI Voice Chatbot (Gemini 1.5 Flash)</h2>")

    chatbot = gr.Chatbot(height=450)

    with gr.Row():
        msg = gr.Textbox(placeholder="Type your message...")
        send = gr.Button("Send")

    audio = gr.Audio(type="filepath", label="🎤 Record/Upload your voice")

    # Text input
    send.click(chat_response, [msg, chatbot], chatbot)
    msg.submit(chat_response, [msg, chatbot], chatbot)

    # Voice input
    audio.change(handle_voice, [audio, chatbot], chatbot)

app.launch()