Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| import gradio as gr | |
| from groq import Groq | |
| # Load API key from Hugging Face Secret | |
| api_key = os.getenv("GroqApiKey") | |
| # Initialize Groq client | |
| client = Groq(api_key=api_key) | |
| def ask_ai(audio_file): | |
| try: | |
| # 1. Speech-to-Text | |
| with open(audio_file, "rb") as file: | |
| transcription = client.audio.transcriptions.create( | |
| file=("user_input.wav", file.read()), | |
| model="whisper-large-v3", | |
| response_format="verbose_json", | |
| ) | |
| user_text = transcription.text | |
| # 2. LLM Completion | |
| completion = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[{"role": "user", "content": user_text}], | |
| temperature=1, | |
| max_completion_tokens=512, | |
| top_p=1, | |
| ) | |
| answer_text = completion.choices[0].message.content | |
| # 3. Text-to-Speech | |
| speech_file_path = Path("answer.wav") | |
| response = client.audio.speech.create( | |
| model="playai-tts", | |
| voice="Calum-PlayAI", | |
| response_format="wav", | |
| input=answer_text, | |
| ) | |
| # Save audio file | |
| with open(speech_file_path, "wb") as f: | |
| for chunk in response.iter_bytes(): | |
| f.write(chunk) | |
| return user_text, answer_text, str(speech_file_path) | |
| except Exception as e: | |
| return "Error processing your request.", str(e), None | |
| # Gradio Interface | |
| ui = gr.Interface( | |
| fn=ask_ai, | |
| inputs=gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Ask me a question (record or upload audio)" | |
| ), | |
| outputs=[ | |
| gr.Textbox(label="Transcribed Question"), | |
| gr.Textbox(label="AI Answer"), | |
| gr.Audio(label="Answer Audio") | |
| ], | |
| title="🎤 Voice Q&A with Groq AI", | |
| description="Record or upload an audio file, get an AI-generated spoken answer.", | |
| ) | |
| if __name__ == "__main__": | |
| ui.launch() | |