File size: 2,172 Bytes
3ecb667 90ec493 3ecb667 90ec493 3ecb667 90ec493 3ecb667 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import os
import whisper
from gtts import gTTS
from groq import Groq
import gradio as gr
# Set your Groq API key (added directly for simplicity, ensure you keep it secure)
os.environ["GROQ_API_KEY"] = "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"
# Initialize Whisper model
model = whisper.load_model("base")
# Initialize Groq API client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Step 1: Transcribe Audio (Speech-to-Text using Whisper)
def transcribe_audio(audio_path):
result = model.transcribe(audio_path)
return result['text']
# Step 2: Interact with LLM (Groq API)
def interact_with_llm(user_input):
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": user_input,
}
],
model="llama3-8b-8192",
stream=False,
)
response = chat_completion.choices[0].message.content
return response
# Step 3: Convert Text to Speech using gTTS
def text_to_speech(text):
tts = gTTS(text, lang="en")
audio_file = "response.mp3"
tts.save(audio_file)
return audio_file
# Combined workflow: Transcribe -> Interact with LLM -> Convert to Speech
def chatbot(audio):
# Step 1: Transcribe Audio to Text
transcription = transcribe_audio(audio)
# Step 2: Get LLM response based on transcription
llm_response = interact_with_llm(transcription)
# Step 3: Convert LLM response to audio (text-to-speech)
audio_output = text_to_speech(llm_response)
return transcription, llm_response, audio_output
# Gradio Interface setup
interface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath", label="Speak into the microphone"),
outputs=[
"text", # Transcription output
"text", # LLM response output
gr.Audio(type="filepath", label="Response Audio") # Final audio output
],
live=True,
title="Real-Time Voice-to-Voice Chatbot",
description="Talk to an AI in real-time! Speak into the microphone, get a response, and hear it back.",
)
# Launch Gradio app
if __name__ == "__main__":
interface.launch()
|