Spaces:

KittyMona
/

AudioPractice

Sleeping

App Files Files Community

KittyMona commited on Jun 10, 2025

Commit

a84fc4e

verified ·

1 Parent(s): dba938e

Create app.py

Browse files

Files changed (1) hide show

app.py +94 -0

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import whisper
+import scipy.io.wavfile as wav
+from groq import Groq
+from gtts import gTTS
+import gradio as gr
+from pydub import AudioSegment
+# Load Whisper model (Use "small" or "medium" if "base" is too slow)
+model = whisper.load_model("base")
+# Set the Groq API key as an environment variable
+os.environ["GROQ_API_KEY"] = "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"  # Replace with your actual key
+# Get the Groq API key from the environment variable
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    raise ValueError("❌ ERROR: Groq API key is missing! Set it in your environment.")
+# Initialize the Groq client using the API key variable
+client = Groq(api_key=GROQ_API_KEY)
+# Function to transcribe audio using Whisper
+def transcribe_audio(file_path):
+    try:
+        print(f"📂 Processing File: {file_path}")
+        # Convert audio to WAV (if needed)
+        audio = AudioSegment.from_file(file_path)
+        converted_path = "converted.wav"
+        audio.export(converted_path, format="wav")
+        # Run Whisper Transcription
+        result = model.transcribe(converted_path, fp16=False)  # Use FP32 for CPU
+        return result["text"]
+    except Exception as e:
+        return f"❌ ERROR in Transcription: {str(e)}"
+# Function to interact with Groq LLM
+def chat_with_groq(text):
+    try:
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": text}],
+            model="llama-3.3-70b-versatile"
+        )
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        return f"❌ ERROR in LLM Interaction: {str(e)}"
+# Function to convert text to speech
+def text_to_speech(text):
+    try:
+        tts = gTTS(text=text, lang="en")
+        filename = "output_audio.mp3"
+        tts.save(filename)
+        return filename
+    except Exception as e:
+        return f"❌ ERROR in TTS: {str(e)}"
+# Main chatbot function (User Uploads Different Files)
+def voice_chatbot(audio_file):
+    if not audio_file:
+        return "❌ Please upload an audio file!", None
+    # Process Speech-to-Text
+    text = transcribe_audio(audio_file)
+    if "ERROR" in text:
+        return text, None  # Return error message
+    # Get AI response
+    response_text = chat_with_groq(text)
+    if "ERROR" in response_text:
+        return response_text, None  # Return error message
+    # Convert response to speech
+    response_audio = text_to_speech(response_text)
+    if "ERROR" in response_audio:
+        return response_audio, None  # Return error message
+    return response_text, response_audio
+# Gradio UI for File Upload (No Default File)
+iface = gr.Interface(
+    fn=voice_chatbot,
+    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
+    outputs=["text", "audio"],
+    title="🎤 Real-Time Voice Chatbot",
+    description="Upload an audio file to transcribe and chat with AI.",
+)
+# Launch Gradio App
+iface.launch()