Spaces:

rakeshrohan
/

testapp

Sleeping

App Files Files Community

testapp / app.py

rakeshrohan

Update app.py

5b12eef verified over 1 year ago

raw

history blame contribute delete

3.8 kB



	import gradio as gr
	import uuid
	import os
	import speech_recognition as sr
	from gtts import gTTS
	from langchain_community.llms import Ollama
	from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
	from langchain_community.chat_message_histories import ChatMessageHistory
	from langchain_core.runnables.history import RunnableWithMessageHistory

	# Initialize the model and prompt template
	chat = Ollama(model="llama3:latest")

	prompt = ChatPromptTemplate.from_messages([
	("system", """
	You are a helpful AI assistant. Your task is to engage in conversation with users,
	answer their questions, and assist them with various tasks.
	Communicate politely and maintain focus on the user's needs.
	Keep responses concise, typically two to three sentences.
	"""),
	MessagesPlaceholder(variable_name="history"),
	("human", "{input}"),
	])

	runnable = prompt \| chat

	with_message_history = RunnableWithMessageHistory(
	runnable,
	lambda session_id: ChatMessageHistory(),
	input_messages_key="input",
	history_messages_key="history",
	)

	def text_to_speech(text, file_name):
	tts = gTTS(text=text, lang='en', slow=False)
	file_path = os.path.join(os.getcwd(), file_name)
	tts.save(file_path)
	return file_path

	def speech_to_text(audio):
	if audio is None:
	return "No audio input received."

	recognizer = sr.Recognizer()
	try:
	with sr.AudioFile(audio) as source:
	audio_data = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio_data)
	print(text)
	return text
	except sr.UnknownValueError:
	return "Speech recognition could not understand the audio"
	except sr.RequestError:
	return "Could not request results from the speech recognition service"
	except Exception as e:
	return f"Error processing audio: {str(e)}"

	def chat_function(input_type, text_input=None, audio_input=None, history=None):
	if history is None:
	history = []

	if input_type == "text":
	user_input = text_input
	elif input_type == "audio":
	if audio_input is not None:
	user_input = speech_to_text(audio_input)
	else:
	user_input = "No audio input received."
	else:
	return history, history, None

	print(f"User input: {user_input}") # Debug information

	# Get LLM response
	response = with_message_history.invoke(
	{"input": user_input},
	config={"configurable": {"session_id": "chat_history"}},
	)

	# Generate audio for LLM response
	audio_file = f"response_{uuid.uuid4()}.mp3"
	audio_path = text_to_speech(response, audio_file)

	# Update history in the correct format
	history.append((user_input, response))

	return history, history, audio_path

	# Gradio interface
	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	with gr.Row():
	text_input = gr.Textbox(placeholder="Type your message here...")
	audio_input = gr.Audio(sources=['microphone'], type="filepath")
	with gr.Row():
	text_button = gr.Button("Send Text")
	audio_button = gr.Button("Send Audio")
	audio_output = gr.Audio()

	def on_audio_change(audio):
	if audio is not None:
	return speech_to_text(audio)
	return ""

	audio_input.change(on_audio_change, inputs=[audio_input], outputs=[text_input])
	text_button.click(chat_function, inputs=[gr.Textbox(value="text"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output])
	audio_button.click(chat_function, inputs=[gr.Textbox(value="audio"), text_input, audio_input, chatbot], outputs=[chatbot, chatbot, audio_output])

	demo.launch(server_name='0.0.0.0',share=True,max_threads=10)