File size: 1,419 Bytes
9f451f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline
from groq import Groq
from TTS.api import TTS
import os

# ----------------------------
# Load models
# ----------------------------

# Whisper (Speech β†’ Text)
stt = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-small"
)

# Groq Client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Text β†’ Speech
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)

# ----------------------------
# Core function
# ----------------------------

def voice_to_voice(audio):
    # Speech β†’ Text
    text = stt(audio)["text"]

    # LLM Response
    completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[{"role": "user", "content": text}]
    )
    reply = completion.choices[0].message.content

    # Text β†’ Speech
    output_path = "response.wav"
    tts.tts_to_file(text=reply, file_path=output_path)

    return reply, output_path

# ----------------------------
# UI
# ----------------------------

ui = gr.Interface(
    fn=voice_to_voice,
    inputs=gr.Audio(type="filepath", label="🎀 Speak"),
    outputs=[
        gr.Textbox(label="🧠 AI Response"),
        gr.Audio(label="πŸ”Š Voice Reply")
    ],
    title="Voice to Voice AI (Groq + Hugging Face)",
    description="Speak β†’ AI thinks β†’ AI speaks back"
)

ui.launch()