Spaces:
Sleeping
Sleeping
File size: 2,662 Bytes
1fa3e4c 6de38d8 3a0ec3f 5650516 b73a5f7 bc8f67e 9a917c3 b56f80b d9e3c0a bc8f67e 3a0ec3f 9a917c3 bc8f67e 5650516 bc8f67e 9a917c3 bc8f67e f404bdb bc8f67e b73a5f7 bc8f67e b73a5f7 bc8f67e b73a5f7 bc8f67e b73a5f7 bc8f67e 9a917c3 bc8f67e 9a917c3 bc8f67e 8a3153b bc8f67e 3a0ec3f b73a5f7 5650516 3a0ec3f bc8f67e c20bd0b 3a0ec3f 8a3153b 9a917c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import requests
import gradio as gr
import pyttsx3
import speech_recognition as sr
# Replace with your Gemini API Key and endpoint
API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps" # Replace with your actual API key
API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText" # Gemini API URL
# Function to call Gemini API
def call_gemini_api(message):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"prompt": message,
"max_output_tokens": 100
}
try:
# Sending request to Gemini API
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 200:
return response.json().get("generated_text", "No response text")
else:
return f"Error: {response.status_code}, {response.text}"
except Exception as e:
return f"Error occurred while calling API: {str(e)}"
# Convert text to speech (TTS)
def text_to_speech(text):
try:
engine = pyttsx3.init()
audio_filename = "response.mp3"
engine.save_to_file(text, audio_filename)
engine.runAndWait()
return audio_filename
except Exception as e:
print(f"Error with TTS: {e}")
return None
# Convert audio to text (ASR)
def audio_to_text(audio_path):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
try:
return recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError:
return "Request error with the recognition service"
# Define function for Gradio interface
def respond(text_input=None, audio_input=None):
if audio_input:
# If audio input is provided, convert it to text
text_input = audio_to_text(audio_input)
if not text_input:
return "Error: No input provided.", None
# Call Gemini API with text input and get response
api_response = call_gemini_api(text_input)
# Convert the API response text into audio
audio_response = text_to_speech(api_response)
return api_response, audio_response
# Gradio Interface setup
demo = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Text Input", placeholder="Enter your message..."),
gr.Audio(type="filepath", label="Audio Input")
],
outputs=[
gr.Textbox(label="Response Text"),
gr.Audio(label="Response Audio")
]
)
if __name__ == "__main__":
demo.launch(debug=True)
|