Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| from google import genai | |
| from gtts import gTTS | |
| import tempfile | |
| import time | |
| # Configure the Gemini API | |
| GOOGLE_API_KEY = os.getenv("gemini_api") # Replace with your actual API key | |
| client = genai.Client(api_key=GOOGLE_API_KEY) | |
| def transcribe_audio(audio_path): | |
| """ | |
| This function uses Google's Speech-to-Text API to transcribe audio. | |
| For the free tier, we're using a simple placeholder. | |
| In a real application, you'd use a proper STT API here. | |
| """ | |
| # For demonstration, we're returning a placeholder message | |
| # In a real app, you would connect to a speech-to-text service | |
| response = client.models.generate_content( | |
| model='gemini-2.0-flash', | |
| contents=['Transcribe the input audio & return the transcription only Example - Audio file is transcribed to Hello then just return Hello', audio_path] | |
| ) | |
| print(response.text) | |
| return response.text | |
| def text_to_speech(text): | |
| """Convert text to speech using gTTS and return the path to the audio file""" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: | |
| tts = gTTS(text=text, lang='en') | |
| tts.save(fp.name) | |
| return fp.name | |
| def chat_with_gemini(user_input, history): | |
| """ | |
| Process user input through Gemini API and return the response | |
| """ | |
| # Initialize conversation or continue existing one | |
| if not history: | |
| history = [] | |
| chat = client.chats.create(model="gemini-2.0-flash") | |
| print("History is",history) | |
| print("User input is ",user_input) | |
| # Generate response | |
| response = chat.send_message(user_input) | |
| response_text = response.text | |
| print("Response text is ",response_text) | |
| # Update history | |
| history.append(user_input) | |
| history.append(response_text) | |
| # Generate audio response | |
| audio_path = text_to_speech(response_text) | |
| return response_text, history, audio_path | |
| def process_audio(audio, history): | |
| """Process audio input, convert to text, and get response""" | |
| if audio is None: | |
| return "No audio detected", history, None | |
| # Convert audio to text | |
| user_input = transcribe_audio(audio) | |
| # Get response from Gemini | |
| response_text, new_history, audio_path = chat_with_gemini(user_input, history) | |
| return response_text, new_history, audio_path | |
| def process_text(text_input, history): | |
| """Process text input and get response""" | |
| if not text_input.strip(): | |
| return "No input detected", history, None | |
| # Get response from Gemini | |
| response_text, new_history, audio_path = chat_with_gemini(text_input, history) | |
| return response_text, new_history, audio_path | |
| def display_history(history): | |
| """Format the history for display""" | |
| if not history: | |
| return "No conversation history yet." | |
| display_text = "" | |
| for i in range(0, len(history), 2): | |
| if i < len(history): | |
| display_text += f"You: {history[i]}\n\n" | |
| if i + 1 < len(history): | |
| display_text += f"Assistant: {history[i+1]}\n\n" | |
| return display_text | |
| # Create the Gradio interface | |
| with gr.Blocks(title="Gemini Audio Chatbot") as demo: | |
| gr.Markdown("# Gemini Audio Chatbot") | |
| gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.") | |
| # State for conversation history | |
| history = gr.State([]) | |
| with gr.Row(): | |
| with gr.Column(scale=7): | |
| # Chat history display | |
| chat_display = gr.Markdown("No conversation history yet.") | |
| with gr.Column(scale=3): | |
| # Info and instructions | |
| gr.Markdown(""" | |
| ## How to use: | |
| 1. Speak using the microphone or type your message | |
| 2. Wait for the assistant's response | |
| 3. The conversation history will be displayed on the left | |
| """) | |
| with gr.Row(): | |
| # Audio input | |
| audio_input = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Audio Input" | |
| ) | |
| with gr.Row(): | |
| # Assistant's response | |
| response_text = gr.Textbox(label="Assistant's Response") | |
| with gr.Row(): | |
| # Audio output | |
| audio_output = gr.Audio(label="Assistant's Voice") | |
| # Buttons | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear Conversation") | |
| audio_input.change( | |
| process_audio, | |
| inputs=[audio_input, history], | |
| outputs=[response_text, history, audio_output] | |
| ).then( | |
| display_history, | |
| inputs=[history], | |
| outputs=[chat_display] | |
| ) | |
| clear_btn.click( | |
| lambda: ([], "No conversation history yet.", "", None), | |
| outputs=[history, chat_display, response_text, audio_output] | |
| ) | |
| demo.launch() |