Spaces:
Build error
Build error
File size: 1,419 Bytes
9f451f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
import torch
import soundfile as sf
from transformers import pipeline
from groq import Groq
from TTS.api import TTS
import os
# ----------------------------
# Load models
# ----------------------------
# Whisper (Speech β Text)
stt = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small"
)
# Groq Client
client = Groq(api_key=os.environ["GROQ_API_KEY"])
# Text β Speech
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
# ----------------------------
# Core function
# ----------------------------
def voice_to_voice(audio):
# Speech β Text
text = stt(audio)["text"]
# LLM Response
completion = client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": text}]
)
reply = completion.choices[0].message.content
# Text β Speech
output_path = "response.wav"
tts.tts_to_file(text=reply, file_path=output_path)
return reply, output_path
# ----------------------------
# UI
# ----------------------------
ui = gr.Interface(
fn=voice_to_voice,
inputs=gr.Audio(type="filepath", label="π€ Speak"),
outputs=[
gr.Textbox(label="π§ AI Response"),
gr.Audio(label="π Voice Reply")
],
title="Voice to Voice AI (Groq + Hugging Face)",
description="Speak β AI thinks β AI speaks back"
)
ui.launch()
|