Spaces:

kamal45
/

Real-Time-Chatbot

Sleeping

App Files Files Community

Real-Time-Chatbot / app.py

kamal45

Update app.py

c0d86c1 verified about 1 year ago

raw

history blame contribute delete

3.12 kB

	import os
	import torch
	import whisper
	from gtts import gTTS
	import gradio as gr
	from groq import Groq
	import numpy as np
	import io

	# Load the Whisper model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = whisper.load_model("base", device=device)

	GROQ_API_KEY ="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"

	Client = Groq(api_key=GROQ_API_KEY)

	# Set your Groq API key (replace with your actual key or set it in the environment)
	os.environ["GROQ_API_KEY"] = "your_groq_api_key_here"
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# Function to transcribe audio using Whisper
	def transcribe(audio_data):
	try:
	# Convert numpy array (audio) to bytes and save it as a temporary file
	audio_path = "temp_audio.wav"
	with open(audio_path, "wb") as f:
	f.write(audio_data)

	# Transcribe the saved audio file
	result = model.transcribe(audio_path)
	os.remove(audio_path) # Clean up the temporary file
	return result["text"]
	except Exception as e:
	return f"Error during transcription: {e}"

	# Function to get response from Groq's LLM
	def get_llm_response(text):
	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": text}],
	model="llama-3.3-70b-versatile",
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	return f"Error during LLM response generation: {e}"

	# Function to convert text to speech
	def text_to_speech(text):
	try:
	tts = gTTS(text, lang="en")
	audio_path = "response.mp3"
	tts.save(audio_path)
	return audio_path
	except Exception as e:
	return f"Error during text-to-speech conversion: {e}"

	# Combined function for processing audio input and generating audio output
	def process_audio(audio_data):
	transcription = transcribe(audio_data)
	if "Error" in transcription:
	return transcription, None, None

	llm_response = get_llm_response(transcription)
	if "Error" in llm_response:
	return transcription, llm_response, None

	audio_response = text_to_speech(llm_response)
	if "Error" in audio_response:
	return transcription, llm_response, audio_response

	return transcription, llm_response, audio_response

	# Build the Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(type="numpy", label="Speak", interactive=True)
	with gr.Column():
	transcription_output = gr.Textbox(label="Transcription (Text)", lines=2)
	response_output = gr.Textbox(label="Response (LLM Text)", lines=2)
	audio_output = gr.Audio(label="Response (Audio)")
	submit_button = gr.Button("Submit")

	# Connect the input and output components
	submit_button.click(
	process_audio,
	inputs=[audio_input],
	outputs=[transcription_output, response_output, audio_output],
	)

	# Launch the app
	app.launch()