Spaces:
Sleeping
Sleeping
| import os | |
| import whisper | |
| import scipy.io.wavfile as wav | |
| from groq import Groq | |
| from gtts import gTTS | |
| import gradio as gr | |
| from pydub import AudioSegment | |
| # Load Whisper model (Use "small" or "medium" if "base" is too slow) | |
| model = whisper.load_model("base") | |
| # Set the Groq API key as an environment variable | |
| os.environ["GROQ_API_KEY"] = "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" # Replace with your actual key | |
| # Get the Groq API key from the environment variable | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GROQ_API_KEY: | |
| raise ValueError("β ERROR: Groq API key is missing! Set it in your environment.") | |
| # Initialize the Groq client using the API key variable | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # Function to transcribe audio using Whisper | |
| def transcribe_audio(file_path): | |
| try: | |
| print(f"π Processing File: {file_path}") | |
| # Convert audio to WAV (if needed) | |
| audio = AudioSegment.from_file(file_path) | |
| converted_path = "converted.wav" | |
| audio.export(converted_path, format="wav") | |
| # Run Whisper Transcription | |
| result = model.transcribe(converted_path, fp16=False) # Use FP32 for CPU | |
| return result["text"] | |
| except Exception as e: | |
| return f"β ERROR in Transcription: {str(e)}" | |
| # Function to interact with Groq LLM | |
| def chat_with_groq(text): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": text}], | |
| model="llama-3.3-70b-versatile" | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| return f"β ERROR in LLM Interaction: {str(e)}" | |
| # Function to convert text to speech | |
| def text_to_speech(text): | |
| try: | |
| tts = gTTS(text=text, lang="en") | |
| filename = "output_audio.mp3" | |
| tts.save(filename) | |
| return filename | |
| except Exception as e: | |
| return f"β ERROR in TTS: {str(e)}" | |
| # Main chatbot function (User Uploads Different Files) | |
| def voice_chatbot(audio_file): | |
| if not audio_file: | |
| return "β Please upload an audio file!", None | |
| # Process Speech-to-Text | |
| text = transcribe_audio(audio_file) | |
| if "ERROR" in text: | |
| return text, None # Return error message | |
| # Get AI response | |
| response_text = chat_with_groq(text) | |
| if "ERROR" in response_text: | |
| return response_text, None # Return error message | |
| # Convert response to speech | |
| response_audio = text_to_speech(response_text) | |
| if "ERROR" in response_audio: | |
| return response_audio, None # Return error message | |
| return response_text, response_audio | |
| # Gradio UI for File Upload (No Default File) | |
| iface = gr.Interface( | |
| fn=voice_chatbot, | |
| inputs=gr.Audio(type="filepath", label="Upload an Audio File"), | |
| outputs=["text", "audio"], | |
| title="π€ Real-Time Voice Chatbot", | |
| description="Upload an audio file to transcribe and chat with AI.", | |
| ) | |
| # Launch Gradio App | |
| iface.launch() |