Spaces:

MalikShehram
/

AUDIOTOAUDIO

Sleeping

App Files Files Community

AUDIOTOAUDIO / app.py

MalikShehram

Update app.py

579c50c verified over 1 year ago

raw

history blame contribute delete

2.33 kB

	import whisper
	from groq import Groq
	from gtts import gTTS
	import os
	import gradio as gr
	import tempfile
	import logging

	# Setup logging
	logging.basicConfig(level=logging.DEBUG)

	# Initialize the Whisper model
	whisper_model = whisper.load_model("base")

	# Initialize Groq client
	client = Groq(api_key="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")

	def process_audio(input_audio):
	try:
	# Log received audio file
	logging.debug(f"Received audio file: {input_audio}")

	# Transcribe audio with Whisper
	transcription = whisper_model.transcribe(input_audio)
	if "text" not in transcription or not transcription['text']:
	raise ValueError("Whisper failed to transcribe the audio.")

	user_text = transcription['text']
	logging.debug(f"Transcription: {user_text}")

	# Interact with LLM via Groq API
	response = client.chat.completions.create(
	messages=[{"role": "user", "content": user_text}],
	model="llama3-8b-8192",
	)

	if not response.choices:
	raise ValueError("Groq API returned an empty response.")

	llm_response = response.choices[0].message.content
	logging.debug(f"LLM Response: {llm_response}")

	# Convert LLM response to speech with gTTS
	if not llm_response.strip():
	raise ValueError("LLM response is empty or invalid.")

	tts = gTTS(llm_response)
	temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
	tts.save(temp_output.name)
	logging.debug(f"Generated audio file: {temp_output.name}")

	# Return response text and audio file path
	return llm_response, temp_output.name

	except Exception as e:
	logging.error(f"Error in process_audio: {e}")
	return f"An error occurred: {str(e)}", None

	# Create Gradio interface
	interface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath"), # Using 'filepath' to get the file path
	outputs=[
	gr.Textbox(label="LLM Response"),
	gr.Audio(label="Response Audio"),
	],
	title="Real-Time Voice-to-Voice Chatbox",
	description="Transcribes input audio, interacts with an LLM via Groq API, and generates audio responses."
	)

	# Launch Gradio app
	interface.launch(share=True)