Spaces:

KittyMona
/

AudioPractice

Sleeping

File size: 2,992 Bytes

a84fc4e

import os
import whisper
import scipy.io.wavfile as wav
from groq import Groq
from gtts import gTTS
import gradio as gr
from pydub import AudioSegment

# Load Whisper model (Use "small" or "medium" if "base" is too slow)
model = whisper.load_model("base")

# Set the Groq API key as an environment variable
os.environ["GROQ_API_KEY"] = "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"  # Replace with your actual key

# Get the Groq API key from the environment variable
GROQ_API_KEY = os.getenv("GROQ_API_KEY") 
if not GROQ_API_KEY:
    raise ValueError("❌ ERROR: Groq API key is missing! Set it in your environment.")

# Initialize the Groq client using the API key variable
client = Groq(api_key=GROQ_API_KEY)
# Function to transcribe audio using Whisper
def transcribe_audio(file_path):
    try:
        print(f"📂 Processing File: {file_path}")
        
        # Convert audio to WAV (if needed)
        audio = AudioSegment.from_file(file_path)
        converted_path = "converted.wav"
        audio.export(converted_path, format="wav")

        # Run Whisper Transcription
        result = model.transcribe(converted_path, fp16=False)  # Use FP32 for CPU
        return result["text"]
    
    except Exception as e:
        return f"❌ ERROR in Transcription: {str(e)}"

# Function to interact with Groq LLM
def chat_with_groq(text):
    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="llama-3.3-70b-versatile"
        )
        return chat_completion.choices[0].message.content
    
    except Exception as e:
        return f"❌ ERROR in LLM Interaction: {str(e)}"

# Function to convert text to speech
def text_to_speech(text):
    try:
        tts = gTTS(text=text, lang="en")
        filename = "output_audio.mp3"
        tts.save(filename)
        return filename
    
    except Exception as e:
        return f"❌ ERROR in TTS: {str(e)}"

# Main chatbot function (User Uploads Different Files)
def voice_chatbot(audio_file):
    if not audio_file:
        return "❌ Please upload an audio file!", None

    # Process Speech-to-Text
    text = transcribe_audio(audio_file)
    if "ERROR" in text:
        return text, None  # Return error message

    # Get AI response
    response_text = chat_with_groq(text)
    if "ERROR" in response_text:
        return response_text, None  # Return error message

    # Convert response to speech
    response_audio = text_to_speech(response_text)
    if "ERROR" in response_audio:
        return response_audio, None  # Return error message

    return response_text, response_audio

# Gradio UI for File Upload (No Default File)
iface = gr.Interface(
    fn=voice_chatbot,
    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
    outputs=["text", "audio"],
    title="🎤 Real-Time Voice Chatbot",
    description="Upload an audio file to transcribe and chat with AI.",
)

# Launch Gradio App
iface.launch()