File size: 1,501 Bytes
c5c485b
c1acd01
c5c485b
c1acd01
 
c5c485b
 
c1acd01
03aa0cf
c1acd01
 
c5c485b
c1acd01
c5c485b
c1acd01
 
 
 
c5c485b
c1acd01
 
 
 
c5c485b
c1acd01
 
 
 
 
c5c485b
c1acd01
 
c5c485b
c1acd01
 
 
 
c5c485b
c1acd01
c5c485b
c1acd01
 
 
c5c485b
 
c1acd01
a27c2ba
c1acd01
 
c5c485b
c1acd01
 
 
c5c485b
c1acd01
 
 
 
 
c5c485b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
from faster_whisper import WhisperModel
from groq import Groq
from gtts import gTTS
import os

print("Loading Whisper...")
whisper_model = WhisperModel("base", compute_type="int8")

# Use HF secret instead of Colab cell
client = Groq(api_key=os.environ["GROQ_API_KEY"])

print("Ready")

def voice_ai(audio_path):
    try:
        if audio_path is None:
            return None, "", ""

        # ---------------- STT
        segments, _ = whisper_model.transcribe(audio_path)
        user_text = "".join([seg.text for seg in segments])
        print("User:", user_text)

        # ---------------- LLM
        completion = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[{"role": "user", "content": user_text}]
        )

        reply = completion.choices[0].message.content
        print("AI:", reply)

        # ---------------- TTS
        output_file = "response.mp3"
        tts = gTTS(reply)
        tts.save(output_file)

        return output_file, user_text, reply

    except Exception as e:
        print("ERROR:", e)
        return None, "error", str(e)

with gr.Blocks() as demo:
    gr.Markdown("# 🎙 Voice AI")

    mic = gr.Audio(type="filepath")
    btn = gr.Button("Generate")

    out_audio = gr.Audio()
    user_box = gr.Textbox(label="You said")
    ai_box = gr.Textbox(label="AI reply")

    btn.click(
        voice_ai,
        inputs=mic,
        outputs=[out_audio, user_box, ai_box]
    )

demo.launch()