Spaces:
Sleeping
Sleeping
| import whisper | |
| from groq import Groq | |
| from gtts import gTTS | |
| import os | |
| import gradio as gr | |
| import tempfile | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.DEBUG) | |
| # Initialize the Whisper model | |
| whisper_model = whisper.load_model("base") | |
| # Initialize Groq client | |
| client = Groq(api_key="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") | |
| def process_audio(input_audio): | |
| try: | |
| # Log received audio file | |
| logging.debug(f"Received audio file: {input_audio}") | |
| # Transcribe audio with Whisper | |
| transcription = whisper_model.transcribe(input_audio) | |
| if "text" not in transcription or not transcription['text']: | |
| raise ValueError("Whisper failed to transcribe the audio.") | |
| user_text = transcription['text'] | |
| logging.debug(f"Transcription: {user_text}") | |
| # Interact with LLM via Groq API | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": user_text}], | |
| model="llama3-8b-8192", | |
| ) | |
| if not response.choices: | |
| raise ValueError("Groq API returned an empty response.") | |
| llm_response = response.choices[0].message.content | |
| logging.debug(f"LLM Response: {llm_response}") | |
| # Convert LLM response to speech with gTTS | |
| if not llm_response.strip(): | |
| raise ValueError("LLM response is empty or invalid.") | |
| tts = gTTS(llm_response) | |
| temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_output.name) | |
| logging.debug(f"Generated audio file: {temp_output.name}") | |
| # Return response text and audio file path | |
| return llm_response, temp_output.name | |
| except Exception as e: | |
| logging.error(f"Error in process_audio: {e}") | |
| return f"An error occurred: {str(e)}", None | |
| # Create Gradio interface | |
| interface = gr.Interface( | |
| fn=process_audio, | |
| inputs=gr.Audio(type="filepath"), # Using 'filepath' to get the file path | |
| outputs=[ | |
| gr.Textbox(label="LLM Response"), | |
| gr.Audio(label="Response Audio"), | |
| ], | |
| title="Real-Time Voice-to-Voice Chatbox", | |
| description="Transcribes input audio, interacts with an LLM via Groq API, and generates audio responses." | |
| ) | |
| # Launch Gradio app | |
| interface.launch(share=True) | |