Spaces:

Asrar990
/

audio_chatbot

Sleeping

File size: 1,946 Bytes

import whisper  
from groq import Groq
import os
from gtts import gTTS
import tempfile
import gradio as gr

# Load Whisper model
model = whisper.load_model("base")  # Use openai-whisper's load_model

# Initialize Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

def transcribe_audio(audio_path):
    """Transcribe audio to text using Whisper."""
    result = model.transcribe(audio_path)
    return result["text"]

def get_llm_response(user_input):
    """Get a response from the LLM via Groq's API."""
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": user_input}],
        model="llama3-8b-8192",
        stream=False,
    )
    return chat_completion.choices[0].message.content

def text_to_speech(text):
    """Convert text to speech using gTTS."""
    tts = gTTS(text)
    temp_audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
    tts.save(temp_audio_path)
    return temp_audio_path

def chatbot_pipeline(audio):
    """Complete pipeline: audio transcription, LLM interaction, and text-to-speech."""
    # Step 1: Transcribe audio
    transcription = transcribe_audio(audio)

    # Step 2: Get LLM response
    llm_response = get_llm_response(transcription)

    # Step 3: Convert response to speech
    response_audio = text_to_speech(llm_response)

    # Return transcription, LLM response, and audio
    return transcription, llm_response, response_audio

# Define Gradio interface
interface = gr.Interface(
    fn=chatbot_pipeline,
    inputs=gr.Audio(type="filepath"),
    outputs=[
        gr.Textbox(label="Transcription"),
        gr.Textbox(label="LLM Response"),
        gr.Audio(label="Response Audio"),
    ],
    live=True,
    title="Real-Time Voice-to-Voice Chatbot",
    description="Transcribe audio, interact with an LLM, and respond with audio in real-time.",
)

# Launch interface
if __name__ == "__main__":
    interface.launch()