import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline
from groq import Groq
from TTS.api import TTS
import os

# ----------------------------
# Load models
# ----------------------------

# Whisper (Speech → Text)
stt = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-small"
)

# Groq Client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Text → Speech
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)

# ----------------------------
# Core function
# ----------------------------

def voice_to_voice(audio):
    # Speech → Text
    text = stt(audio)["text"]

    # LLM Response
    completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[{"role": "user", "content": text}]
    )
    reply = completion.choices[0].message.content

    # Text → Speech
    output_path = "response.wav"
    tts.tts_to_file(text=reply, file_path=output_path)

    return reply, output_path

# ----------------------------
# UI
# ----------------------------

ui = gr.Interface(
    fn=voice_to_voice,
    inputs=gr.Audio(type="filepath", label="🎤 Speak"),
    outputs=[
        gr.Textbox(label="🧠 AI Response"),
        gr.Audio(label="🔊 Voice Reply")
    ],
    title="Voice to Voice AI (Groq + Hugging Face)",
    description="Speak → AI thinks → AI speaks back"
)

ui.launch()