Spaces:

kathirog
/

fumblebots

Sleeping

App Files Files Community

fumblebots / app.py

kathirog

Update app.py

b56f80b verified 11 months ago

raw

history blame contribute delete

2.66 kB

	import requests
	import gradio as gr
	import pyttsx3
	import speech_recognition as sr

	# Replace with your Gemini API Key and endpoint
	API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps" # Replace with your actual API key
	API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText" # Gemini API URL

	# Function to call Gemini API
	def call_gemini_api(message):
	headers = {
	"Authorization": f"Bearer {API_KEY}",
	"Content-Type": "application/json"
	}
	payload = {
	"prompt": message,
	"max_output_tokens": 100
	}
	try:
	# Sending request to Gemini API
	response = requests.post(API_URL, headers=headers, json=payload)
	if response.status_code == 200:
	return response.json().get("generated_text", "No response text")
	else:
	return f"Error: {response.status_code}, {response.text}"
	except Exception as e:
	return f"Error occurred while calling API: {str(e)}"

	# Convert text to speech (TTS)
	def text_to_speech(text):
	try:
	engine = pyttsx3.init()
	audio_filename = "response.mp3"
	engine.save_to_file(text, audio_filename)
	engine.runAndWait()
	return audio_filename
	except Exception as e:
	print(f"Error with TTS: {e}")
	return None

	# Convert audio to text (ASR)
	def audio_to_text(audio_path):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_path) as source:
	audio_data = recognizer.record(source)
	try:
	return recognizer.recognize_google(audio_data)
	except sr.UnknownValueError:
	return "Could not understand audio"
	except sr.RequestError:
	return "Request error with the recognition service"

	# Define function for Gradio interface
	def respond(text_input=None, audio_input=None):
	if audio_input:
	# If audio input is provided, convert it to text
	text_input = audio_to_text(audio_input)

	if not text_input:
	return "Error: No input provided.", None

	# Call Gemini API with text input and get response
	api_response = call_gemini_api(text_input)

	# Convert the API response text into audio
	audio_response = text_to_speech(api_response)

	return api_response, audio_response

	# Gradio Interface setup
	demo = gr.Interface(
	fn=respond,
	inputs=[
	gr.Textbox(label="Text Input", placeholder="Enter your message..."),
	gr.Audio(type="filepath", label="Audio Input")
	],
	outputs=[
	gr.Textbox(label="Response Text"),
	gr.Audio(label="Response Audio")
	]
	)

	if __name__ == "__main__":
	demo.launch(debug=True)