Spaces:

Hamxa1997
/

voice_to_voice_chatbot

Sleeping

App Files Files Community

voice_to_voice_chatbot / app.py

Hamxa1997

Create app.py

6909b3a verified over 1 year ago

raw

history blame contribute delete

2.44 kB

	import gradio as gr
	import os
	from groq import Groq
	import whisper
	import torch
	from gtts import gTTS
	import IPython.display as ipd

	# Set up Whisper with a smaller model or on CPU
	model_name = "small" # Use "small", "base", or "medium" for smaller models
	whisper_model = whisper.load_model(model_name)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	whisper_model.to(device)

	# Set up Groq API with direct API key
	client = Groq(
	api_key="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"
	)

	# Function to transcribe audio using Whisper
	def transcribe_audio(audio_path):
	try:
	print(f"Audio file path received for transcription: {audio_path}")

	# Load the audio file directly using Whisper's built-in functionality
	result = whisper_model.transcribe(audio_path)
	print(f"Transcription result: {result}")
	return result['text']
	except Exception as e:
	print(f"Error during transcription: {e}")
	return "Error during transcription"

	# Function to get response from LLaMA using Groq API
	def get_llama_response(transcription):
	try:
	chat_completion = client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": transcription,
	}
	],
	model="llama3-8b-8192",
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	print(f"Error during LLaMA response generation: {e}")
	return "Error during response generation"

	# Function to convert text to speech using gTTS
	def text_to_speech(text):
	try:
	tts = gTTS(text)
	tts.save("response.mp3")
	return "response.mp3"
	except Exception as e:
	print(f"Error during text-to-speech conversion: {e}")
	return "Error during text-to-speech conversion"

	# Main function for Gradio interface
	def chatbot(audio_path):
	transcription = transcribe_audio(audio_path)
	response = get_llama_response(transcription)
	audio_path = text_to_speech(response)
	return transcription, response, audio_path

	# Gradio Interface
	interface = gr.Interface(
	fn=chatbot,
	inputs=gr.Audio(type="filepath"), # Use 'filepath' to work with the path of the audio file
	outputs=["text", "text", "audio"],
	live=True,
	description="Real-time Voice-to-Voice Chatbot"
	)

	# Launch the interface
	interface.launch()