|
|
import os |
|
|
import whisper |
|
|
from gtts import gTTS |
|
|
from groq import Groq |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
os.environ["GROQ_API_KEY"] = "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" |
|
|
|
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
|
|
|
client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
|
|
|
|
|
|
|
def transcribe_audio(audio_path): |
|
|
result = model.transcribe(audio_path) |
|
|
return result['text'] |
|
|
|
|
|
|
|
|
def interact_with_llm(user_input): |
|
|
chat_completion = client.chat.completions.create( |
|
|
messages=[ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": user_input, |
|
|
} |
|
|
], |
|
|
model="llama3-8b-8192", |
|
|
stream=False, |
|
|
) |
|
|
response = chat_completion.choices[0].message.content |
|
|
return response |
|
|
|
|
|
|
|
|
def text_to_speech(text): |
|
|
tts = gTTS(text, lang="en") |
|
|
audio_file = "response.mp3" |
|
|
tts.save(audio_file) |
|
|
return audio_file |
|
|
|
|
|
|
|
|
def chatbot(audio): |
|
|
|
|
|
transcription = transcribe_audio(audio) |
|
|
|
|
|
|
|
|
llm_response = interact_with_llm(transcription) |
|
|
|
|
|
|
|
|
audio_output = text_to_speech(llm_response) |
|
|
|
|
|
return transcription, llm_response, audio_output |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=chatbot, |
|
|
inputs=gr.Audio(type="filepath", label="Speak into the microphone"), |
|
|
outputs=[ |
|
|
"text", |
|
|
"text", |
|
|
gr.Audio(type="filepath", label="Response Audio") |
|
|
], |
|
|
live=True, |
|
|
title="Real-Time Voice-to-Voice Chatbot", |
|
|
description="Talk to an AI in real-time! Speak into the microphone, get a response, and hear it back.", |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|