Spaces:

palbha
/

conversational_ai

Build error

App Files Files Community

conversational_ai / app1.py

palbha

Rename app.py to app1.py

1131a4e verified about 1 year ago

raw

history blame contribute delete

4.85 kB

	import os
	import gradio as gr
	from google import genai
	from gtts import gTTS
	import tempfile
	import time

	# Configure the Gemini API
	GOOGLE_API_KEY = os.getenv("gemini_api") # Replace with your actual API key

	client = genai.Client(api_key=GOOGLE_API_KEY)



	def transcribe_audio(audio_path):
	"""
	This function uses Google's Speech-to-Text API to transcribe audio.
	For the free tier, we're using a simple placeholder.
	In a real application, you'd use a proper STT API here.
	"""
	# For demonstration, we're returning a placeholder message
	# In a real app, you would connect to a speech-to-text service
	response = client.models.generate_content(
	model='gemini-2.0-flash',
	contents=['Transcribe the input audio & return the transcription only Example - Audio file is transcribed to Hello then just return Hello', audio_path]
	)
	print(response.text)

	return response.text

	def text_to_speech(text):
	"""Convert text to speech using gTTS and return the path to the audio file"""
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
	tts = gTTS(text=text, lang='en')
	tts.save(fp.name)
	return fp.name

	def chat_with_gemini(user_input, history):
	"""
	Process user input through Gemini API and return the response
	"""
	# Initialize conversation or continue existing one
	if not history:
	history = []

	chat = client.chats.create(model="gemini-2.0-flash")

	print("History is",history)
	print("User input is ",user_input)
	# Generate response
	response = chat.send_message(user_input)
	response_text = response.text
	print("Response text is ",response_text)
	# Update history
	history.append(user_input)
	history.append(response_text)

	# Generate audio response
	audio_path = text_to_speech(response_text)

	return response_text, history, audio_path

	def process_audio(audio, history):
	"""Process audio input, convert to text, and get response"""
	if audio is None:
	return "No audio detected", history, None

	# Convert audio to text
	user_input = transcribe_audio(audio)

	# Get response from Gemini
	response_text, new_history, audio_path = chat_with_gemini(user_input, history)

	return response_text, new_history, audio_path

	def process_text(text_input, history):
	"""Process text input and get response"""
	if not text_input.strip():
	return "No input detected", history, None

	# Get response from Gemini
	response_text, new_history, audio_path = chat_with_gemini(text_input, history)

	return response_text, new_history, audio_path

	def display_history(history):
	"""Format the history for display"""
	if not history:
	return "No conversation history yet."

	display_text = ""
	for i in range(0, len(history), 2):
	if i < len(history):
	display_text += f"You: {history[i]}\n\n"
	if i + 1 < len(history):
	display_text += f"Assistant: {history[i+1]}\n\n"

	return display_text

	# Create the Gradio interface
	with gr.Blocks(title="Gemini Audio Chatbot") as demo:
	gr.Markdown("# Gemini Audio Chatbot")
	gr.Markdown("Talk or type your message, and the assistant will respond with text and audio.")

	# State for conversation history
	history = gr.State([])

	with gr.Row():
	with gr.Column(scale=7):
	# Chat history display
	chat_display = gr.Markdown("No conversation history yet.")

	with gr.Column(scale=3):
	# Info and instructions
	gr.Markdown("""
	## How to use:
	1. Speak using the microphone or type your message
	2. Wait for the assistant's response
	3. The conversation history will be displayed on the left
	""")


	with gr.Row():
	# Audio input
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Audio Input"
	)

	with gr.Row():
	# Assistant's response
	response_text = gr.Textbox(label="Assistant's Response")

	with gr.Row():
	# Audio output
	audio_output = gr.Audio(label="Assistant's Voice")

	# Buttons
	with gr.Row():
	clear_btn = gr.Button("Clear Conversation")



	audio_input.change(
	process_audio,
	inputs=[audio_input, history],
	outputs=[response_text, history, audio_output]
	).then(
	display_history,
	inputs=[history],
	outputs=[chat_display]
	)

	clear_btn.click(
	lambda: ([], "No conversation history yet.", "", None),
	outputs=[history, chat_display, response_text, audio_output]
	)

	demo.launch()