Spaces:
Running
Running
File size: 1,501 Bytes
c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 03aa0cf c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b c1acd01 a27c2ba c1acd01 c5c485b c1acd01 c5c485b c1acd01 c5c485b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | import gradio as gr
from faster_whisper import WhisperModel
from groq import Groq
from gtts import gTTS
import os
print("Loading Whisper...")
whisper_model = WhisperModel("base", compute_type="int8")
# Use HF secret instead of Colab cell
client = Groq(api_key=os.environ["GROQ_API_KEY"])
print("Ready")
def voice_ai(audio_path):
try:
if audio_path is None:
return None, "", ""
# ---------------- STT
segments, _ = whisper_model.transcribe(audio_path)
user_text = "".join([seg.text for seg in segments])
print("User:", user_text)
# ---------------- LLM
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{"role": "user", "content": user_text}]
)
reply = completion.choices[0].message.content
print("AI:", reply)
# ---------------- TTS
output_file = "response.mp3"
tts = gTTS(reply)
tts.save(output_file)
return output_file, user_text, reply
except Exception as e:
print("ERROR:", e)
return None, "error", str(e)
with gr.Blocks() as demo:
gr.Markdown("# 🎙 Voice AI")
mic = gr.Audio(type="filepath")
btn = gr.Button("Generate")
out_audio = gr.Audio()
user_box = gr.Textbox(label="You said")
ai_box = gr.Textbox(label="AI reply")
btn.click(
voice_ai,
inputs=mic,
outputs=[out_audio, user_box, ai_box]
)
demo.launch()
|