Audio-to-Audio / app.py
SabaAnver's picture
Create app.py
1a3bef4 verified
import os
from pathlib import Path
import gradio as gr
from groq import Groq
# Load API key from Hugging Face Secret
api_key = os.getenv("GroqApiKey")
# Initialize Groq client
client = Groq(api_key=api_key)
def ask_ai(audio_file):
try:
# 1. Speech-to-Text
with open(audio_file, "rb") as file:
transcription = client.audio.transcriptions.create(
file=("user_input.wav", file.read()),
model="whisper-large-v3",
response_format="verbose_json",
)
user_text = transcription.text
# 2. LLM Completion
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{"role": "user", "content": user_text}],
temperature=1,
max_completion_tokens=512,
top_p=1,
)
answer_text = completion.choices[0].message.content
# 3. Text-to-Speech
speech_file_path = Path("answer.wav")
response = client.audio.speech.create(
model="playai-tts",
voice="Calum-PlayAI",
response_format="wav",
input=answer_text,
)
# Save audio file
with open(speech_file_path, "wb") as f:
for chunk in response.iter_bytes():
f.write(chunk)
return user_text, answer_text, str(speech_file_path)
except Exception as e:
return "Error processing your request.", str(e), None
# Gradio Interface
ui = gr.Interface(
fn=ask_ai,
inputs=gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Ask me a question (record or upload audio)"
),
outputs=[
gr.Textbox(label="Transcribed Question"),
gr.Textbox(label="AI Answer"),
gr.Audio(label="Answer Audio")
],
title="🎤 Voice Q&A with Groq AI",
description="Record or upload an audio file, get an AI-generated spoken answer.",
)
if __name__ == "__main__":
ui.launch()