import os from pathlib import Path import gradio as gr from groq import Groq # Load API key from Hugging Face Secret api_key = os.getenv("GroqApiKey") # Initialize Groq client client = Groq(api_key=api_key) def ask_ai(audio_file): try: # 1. Speech-to-Text with open(audio_file, "rb") as file: transcription = client.audio.transcriptions.create( file=("user_input.wav", file.read()), model="whisper-large-v3", response_format="verbose_json", ) user_text = transcription.text # 2. LLM Completion completion = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[{"role": "user", "content": user_text}], temperature=1, max_completion_tokens=512, top_p=1, ) answer_text = completion.choices[0].message.content # 3. Text-to-Speech speech_file_path = Path("answer.wav") response = client.audio.speech.create( model="playai-tts", voice="Calum-PlayAI", response_format="wav", input=answer_text, ) # Save audio file with open(speech_file_path, "wb") as f: for chunk in response.iter_bytes(): f.write(chunk) return user_text, answer_text, str(speech_file_path) except Exception as e: return "Error processing your request.", str(e), None # Gradio Interface ui = gr.Interface( fn=ask_ai, inputs=gr.Audio( sources=["microphone", "upload"], type="filepath", label="Ask me a question (record or upload audio)" ), outputs=[ gr.Textbox(label="Transcribed Question"), gr.Textbox(label="AI Answer"), gr.Audio(label="Answer Audio") ], title="🎤 Voice Q&A with Groq AI", description="Record or upload an audio file, get an AI-generated spoken answer.", ) if __name__ == "__main__": ui.launch()