File size: 1,907 Bytes
a0e5f05
 
 
 
 
2190138
aabe45e
2190138
6866c2d
2190138
 
aabe45e
a0e5f05
 
aabe45e
a0e5f05
 
 
aabe45e
2190138
 
 
a0e5f05
 
 
aabe45e
2190138
 
 
 
 
 
aabe45e
2190138
a0e5f05
2190138
 
 
aabe45e
a0e5f05
aabe45e
a0e5f05
 
aabe45e
2190138
aabe45e
a0e5f05
 
 
 
 
 
aabe45e
a0e5f05
 
 
 
 
 
aabe45e
2190138
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import gradio as gr
import whisper
from groq import Groq
from gtts import gTTS
import tempfile

# Load Groq API key from Hugging Face secrets
GROQ_API_KEY = "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"

client = Groq(api_key=GROQ_API_KEY)

# Load Whisper model
whisper_model = whisper.load_model("base")

def transcribe_and_chat(audio_file):
    if audio_file is None:
        return "No audio received", "Error: Please record again.", None

    try:
        # 1. Transcribe
        result = whisper_model.transcribe(audio_file)
        user_text = result.get("text", "").strip()
        if not user_text:
            return "Could not transcribe", "Error: Empty transcription", None

        # 2. Query Groq
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": user_text}],
            model="llama-3.1-8b-instant"  # βœ… stable model
        )
        bot_text = chat_completion.choices[0].message.content

        # 3. TTS
        tts = gTTS(bot_text)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
            tts.save(tmp.name)
            reply_audio = tmp.name

        return user_text, bot_text, reply_audio

    except Exception as e:
        return "Transcription error", str(e), None

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## πŸŽ™οΈ Real-time Voice-to-Voice Chatbot (Whisper + Groq + gTTS)")

    audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎀 Speak here")
    user_text_out = gr.Textbox(label="πŸ“ Transcription (You said)")
    bot_text_out = gr.Textbox(label="πŸ€– LLM Reply")
    audio_out = gr.Audio(label="πŸ”Š Bot's Voice", type="filepath")

    btn = gr.Button("▢️ Process")
    btn.click(
        fn=transcribe_and_chat,
        inputs=[audio_in],
        outputs=[user_text_out, bot_text_out, audio_out]
    )

demo.launch()