Audio-to-Audio / app.py
Subayyal's picture
Update app.py
80c64db verified
import os
import streamlit as st
from groq import Groq
from pathlib import Path
from pydub import AudioSegment
# ------------------------------
# Fetch API key from Secrets
api_key = os.environ.get("GrokAPI")
if not api_key:
st.error("❌ Please set GrokAPI secret")
st.stop()
client = Groq(api_key=api_key)
# ------------------------------
st.title("🎀 Audio β†’ AI Text β†’ Speech")
audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"])
def truncate_text(text, max_chars=1000):
"""Limit text size for TTS"""
if len(text) > max_chars:
return text[:max_chars] + "\n\n⚠️ Answer truncated for TTS."
return text
if audio_file:
try:
# ------------------------------
# Convert to WAV
audio_path = Path("input.wav")
audio_segment = AudioSegment.from_file(audio_file)
audio_segment.export(audio_path, format="wav")
# ------------------------------
# Transcribe audio
transcription = client.audio.transcriptions.create(
file=("input.wav", audio_path.read_bytes()),
model="whisper-large-v3",
response_format="text"
)
st.text_area("πŸ“ Question", transcription, height=150)
# ------------------------------
# Generate AI answer (shorter)
completion = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=[{"role": "user", "content": transcription}],
temperature=0.7,
max_completion_tokens=150, # short answer
)
answer_text = completion.choices[0].message.content
st.text_area("πŸ’¬ AI Answer", answer_text, height=200)
# ------------------------------
# Truncate answer to safe length for TTS
answer_text_limited = truncate_text(answer_text, max_chars=1000)
# ------------------------------
# Convert text β†’ speech
speech_path = Path("answer.wav")
response = client.audio.speech.create(
model="playai-tts",
voice="Aaliyah-PlayAI",
response_format="wav",
input=answer_text_limited
)
response.stream_to_file(speech_path)
st.audio(str(speech_path), format="audio/wav")
except Exception as e:
st.error(f"❌ Error: {str(e)}")