File size: 1,528 Bytes
e41bced ea73a72 e41bced 47a8050 e41bced ea27122 e41bced ea73a72 e41bced ea73a72 e41bced | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | # app.py
import os
import whisper
from groq import Groq
from gtts import gTTS
import gradio as gr
import uuid
# Constants
MODEL_NAME = "llama3-70b-8192"
# Load Whisper model once
whisper_model = whisper.load_model("base")
def process_audio(audio_filepath):
# Step 1: Transcribe with Whisper
result = whisper_model.transcribe(audio_filepath)
user_input = result["text"]
# Step 2: Get response from Groq
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
raise ValueError("❌ GROQ_API_KEY not found. Please set it in Hugging Face Secrets.")
client = Groq(api_key=GROQ_API_KEY)
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": user_input}],
model=MODEL_NAME
)
bot_reply = chat_completion.choices[0].message.content
# Step 3: Text-to-Speech
tts = gTTS(text=bot_reply, lang='en')
response_audio_path = f"{uuid.uuid4().hex}_response.mp3"
tts.save(response_audio_path)
return user_input, response_audio_path
# Gradio Interface
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath", label="Upload your voice (.wav/.mp3)"),
outputs=[
gr.Textbox(label="Transcribed Text"),
gr.Audio(label="AI Response")
],
title="🎤 Groq AI Voice Assistant",
description="Upload your voice file. It will be transcribed using Whisper, replied to by Groq LLaMA 3, and spoken back using Google TTS.",
)
if __name__ == "__main__":
iface.launch()
|