AmirAziz1221's picture
Create app.py
9f451f4 verified
import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline
from groq import Groq
from TTS.api import TTS
import os
# ----------------------------
# Load models
# ----------------------------
# Whisper (Speech β†’ Text)
stt = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small"
)
# Groq Client
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# Text β†’ Speech
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
# ----------------------------
# Core function
# ----------------------------
def voice_to_voice(audio):
# Speech β†’ Text
text = stt(audio)["text"]
# LLM Response
completion = client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": text}]
)
reply = completion.choices[0].message.content
# Text β†’ Speech
output_path = "response.wav"
tts.tts_to_file(text=reply, file_path=output_path)
return reply, output_path
# ----------------------------
# UI
# ----------------------------
ui = gr.Interface(
fn=voice_to_voice,
inputs=gr.Audio(type="filepath", label="🎀 Speak"),
outputs=[
gr.Textbox(label="🧠 AI Response"),
gr.Audio(label="πŸ”Š Voice Reply")
],
title="Voice to Voice AI (Groq + Hugging Face)",
description="Speak β†’ AI thinks β†’ AI speaks back"
)
ui.launch()