Spaces:
Sleeping
Sleeping
File size: 2,992 Bytes
a84fc4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import whisper
import scipy.io.wavfile as wav
from groq import Groq
from gtts import gTTS
import gradio as gr
from pydub import AudioSegment
# Load Whisper model (Use "small" or "medium" if "base" is too slow)
model = whisper.load_model("base")
# Set the Groq API key as an environment variable
os.environ["GROQ_API_KEY"] = "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" # Replace with your actual key
# Get the Groq API key from the environment variable
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
raise ValueError("β ERROR: Groq API key is missing! Set it in your environment.")
# Initialize the Groq client using the API key variable
client = Groq(api_key=GROQ_API_KEY)
# Function to transcribe audio using Whisper
def transcribe_audio(file_path):
try:
print(f"π Processing File: {file_path}")
# Convert audio to WAV (if needed)
audio = AudioSegment.from_file(file_path)
converted_path = "converted.wav"
audio.export(converted_path, format="wav")
# Run Whisper Transcription
result = model.transcribe(converted_path, fp16=False) # Use FP32 for CPU
return result["text"]
except Exception as e:
return f"β ERROR in Transcription: {str(e)}"
# Function to interact with Groq LLM
def chat_with_groq(text):
try:
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama-3.3-70b-versatile"
)
return chat_completion.choices[0].message.content
except Exception as e:
return f"β ERROR in LLM Interaction: {str(e)}"
# Function to convert text to speech
def text_to_speech(text):
try:
tts = gTTS(text=text, lang="en")
filename = "output_audio.mp3"
tts.save(filename)
return filename
except Exception as e:
return f"β ERROR in TTS: {str(e)}"
# Main chatbot function (User Uploads Different Files)
def voice_chatbot(audio_file):
if not audio_file:
return "β Please upload an audio file!", None
# Process Speech-to-Text
text = transcribe_audio(audio_file)
if "ERROR" in text:
return text, None # Return error message
# Get AI response
response_text = chat_with_groq(text)
if "ERROR" in response_text:
return response_text, None # Return error message
# Convert response to speech
response_audio = text_to_speech(response_text)
if "ERROR" in response_audio:
return response_audio, None # Return error message
return response_text, response_audio
# Gradio UI for File Upload (No Default File)
iface = gr.Interface(
fn=voice_chatbot,
inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
outputs=["text", "audio"],
title="π€ Real-Time Voice Chatbot",
description="Upload an audio file to transcribe and chat with AI.",
)
# Launch Gradio App
iface.launch() |