fumblebots / app.py
kathirog's picture
Update app.py
b56f80b verified
import requests
import gradio as gr
import pyttsx3
import speech_recognition as sr
# Replace with your Gemini API Key and endpoint
API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps" # Replace with your actual API key
API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText" # Gemini API URL
# Function to call Gemini API
def call_gemini_api(message):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"prompt": message,
"max_output_tokens": 100
}
try:
# Sending request to Gemini API
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
return response.json().get("generated_text", "No response text")
else:
return f"Error: {response.status_code}, {response.text}"
except Exception as e:
return f"Error occurred while calling API: {str(e)}"
# Convert text to speech (TTS)
def text_to_speech(text):
try:
engine = pyttsx3.init()
audio_filename = "response.mp3"
engine.save_to_file(text, audio_filename)
engine.runAndWait()
return audio_filename
except Exception as e:
print(f"Error with TTS: {e}")
return None
# Convert audio to text (ASR)
def audio_to_text(audio_path):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
try:
return recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError:
return "Request error with the recognition service"
# Define function for Gradio interface
def respond(text_input=None, audio_input=None):
if audio_input:
# If audio input is provided, convert it to text
text_input = audio_to_text(audio_input)
if not text_input:
return "Error: No input provided.", None
# Call Gemini API with text input and get response
api_response = call_gemini_api(text_input)
# Convert the API response text into audio
audio_response = text_to_speech(api_response)
return api_response, audio_response
# Gradio Interface setup
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Text Input", placeholder="Enter your message..."),
gr.Audio(type="filepath", label="Audio Input")
],
outputs=[
gr.Textbox(label="Response Text"),
gr.Audio(label="Response Audio")
]
)
if __name__ == "__main__":
demo.launch(debug=True)