import os import streamlit as st from groq import Groq from pathlib import Path from pydub import AudioSegment # ------------------------------ # Fetch API key from Secrets api_key = os.environ.get("GrokAPI") if not api_key: st.error("āŒ Please set GrokAPI secret") st.stop() client = Groq(api_key=api_key) # ------------------------------ st.title("šŸŽ¤ Audio → AI Text → Speech") audio_file = st.file_uploader("Upload audio", type=["wav", "m4a"]) def truncate_text(text, max_chars=1000): """Limit text size for TTS""" if len(text) > max_chars: return text[:max_chars] + "\n\nāš ļø Answer truncated for TTS." return text if audio_file: try: # ------------------------------ # Convert to WAV audio_path = Path("input.wav") audio_segment = AudioSegment.from_file(audio_file) audio_segment.export(audio_path, format="wav") # ------------------------------ # Transcribe audio transcription = client.audio.transcriptions.create( file=("input.wav", audio_path.read_bytes()), model="whisper-large-v3", response_format="text" ) st.text_area("šŸ“ Question", transcription, height=150) # ------------------------------ # Generate AI answer (shorter) completion = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[{"role": "user", "content": transcription}], temperature=0.7, max_completion_tokens=150, # short answer ) answer_text = completion.choices[0].message.content st.text_area("šŸ’¬ AI Answer", answer_text, height=200) # ------------------------------ # Truncate answer to safe length for TTS answer_text_limited = truncate_text(answer_text, max_chars=1000) # ------------------------------ # Convert text → speech speech_path = Path("answer.wav") response = client.audio.speech.create( model="playai-tts", voice="Aaliyah-PlayAI", response_format="wav", input=answer_text_limited ) response.stream_to_file(speech_path) st.audio(str(speech_path), format="audio/wav") except Exception as e: st.error(f"āŒ Error: {str(e)}")