Spaces:
Sleeping
Sleeping
| import requests | |
| import gradio as gr | |
| import pyttsx3 | |
| import speech_recognition as sr | |
| # Replace with your Gemini API Key and endpoint | |
| API_KEY = "AIzaSyAm0RSpUKY38494Fug8SPIpdHLXE2d3cps" # Replace with your actual API key | |
| API_URL = "https://generativelanguage.googleapis.com/v1alpha2/models/gemini2:generateText" # Gemini API URL | |
| # Function to call Gemini API | |
| def call_gemini_api(message): | |
| headers = { | |
| "Authorization": f"Bearer {API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "prompt": message, | |
| "max_output_tokens": 100 | |
| } | |
| try: | |
| # Sending request to Gemini API | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| if response.status_code == 200: | |
| return response.json().get("generated_text", "No response text") | |
| else: | |
| return f"Error: {response.status_code}, {response.text}" | |
| except Exception as e: | |
| return f"Error occurred while calling API: {str(e)}" | |
| # Convert text to speech (TTS) | |
| def text_to_speech(text): | |
| try: | |
| engine = pyttsx3.init() | |
| audio_filename = "response.mp3" | |
| engine.save_to_file(text, audio_filename) | |
| engine.runAndWait() | |
| return audio_filename | |
| except Exception as e: | |
| print(f"Error with TTS: {e}") | |
| return None | |
| # Convert audio to text (ASR) | |
| def audio_to_text(audio_path): | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(audio_path) as source: | |
| audio_data = recognizer.record(source) | |
| try: | |
| return recognizer.recognize_google(audio_data) | |
| except sr.UnknownValueError: | |
| return "Could not understand audio" | |
| except sr.RequestError: | |
| return "Request error with the recognition service" | |
| # Define function for Gradio interface | |
| def respond(text_input=None, audio_input=None): | |
| if audio_input: | |
| # If audio input is provided, convert it to text | |
| text_input = audio_to_text(audio_input) | |
| if not text_input: | |
| return "Error: No input provided.", None | |
| # Call Gemini API with text input and get response | |
| api_response = call_gemini_api(text_input) | |
| # Convert the API response text into audio | |
| audio_response = text_to_speech(api_response) | |
| return api_response, audio_response | |
| # Gradio Interface setup | |
| demo = gr.Interface( | |
| fn=respond, | |
| inputs=[ | |
| gr.Textbox(label="Text Input", placeholder="Enter your message..."), | |
| gr.Audio(type="filepath", label="Audio Input") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Response Text"), | |
| gr.Audio(label="Response Audio") | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) | |