speaklearn-ai / app.py
umaradnaan's picture
Update app.py
d4874be verified
import os
import gradio as gr
import google.generativeai as genai
import speech_recognition as sr
import tempfile
# -----------------------------------------
# GEMINI CONFIGURATION
# -----------------------------------------
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
model = genai.GenerativeModel("gemini-1.5-flash")
# -----------------------------------------
# SPEECH RECOGNITION
# -----------------------------------------
recognizer = sr.Recognizer()
def voice_to_text(audio_path):
try:
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
return recognizer.recognize_google(audio)
except Exception as e:
return f"Error converting voice: {str(e)}"
# -----------------------------------------
# CHAT RESPONSE FUNCTION
# -----------------------------------------
def chat_response(message, history):
if history is None:
history = []
# Convert history β†’ Gemini messages format
gemini_messages = []
for user_msg, bot_msg in history:
gemini_messages.append({"role": "user", "content": user_msg})
gemini_messages.append({"role": "assistant", "content": bot_msg})
# Add the new user message
gemini_messages.append({"role": "user", "content": message})
# ---------------------
# AUTO-CORRECTION STEP
# ---------------------
correction = model.generate_content(
[{"role": "user", "content": f"Correct this sentence only if needed:\n{message}"}]
).text.strip()
if correction.lower() != message.lower():
bot_reply = f"❌ Incorrect sentence.\nCorrect form β†’ **{correction}**\nTry again!"
history.append((message, bot_reply))
return history
# ---------------------
# NORMAL RESPONSE
# ---------------------
response = model.generate_content(gemini_messages).text
history.append((message, response))
return history
# -----------------------------------------
# HANDLE VOICE INPUT
# -----------------------------------------
def handle_voice(audio_file, history):
if audio_file is None:
return history
text = voice_to_text(audio_file)
if "error" in text.lower():
history.append(("Voice Input", text))
return history
return chat_response(text, history)
# -----------------------------------------
# GRADIO UI (OLD VERSION COMPATIBLE)
# -----------------------------------------
with gr.Blocks() as app:
gr.HTML("<h2 style='text-align:center;'>🎀 AI Voice Chatbot (Gemini 1.5 Flash)</h2>")
chatbot = gr.Chatbot(height=450)
with gr.Row():
msg = gr.Textbox(placeholder="Type your message...")
send = gr.Button("Send")
audio = gr.Audio(type="filepath", label="🎀 Record/Upload your voice")
# Text input
send.click(chat_response, [msg, chatbot], chatbot)
msg.submit(chat_response, [msg, chatbot], chatbot)
# Voice input
audio.change(handle_voice, [audio, chatbot], chatbot)
app.launch()