Spaces:

Lesterchia1
/

FPOC2_AI-Tutor_Chatbot

Running

App Files Files Community

Chia Woon Yap commited on Nov 21, 2025

Commit

7242b45

verified ·

1 Parent(s): fb80bae

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -89

app.py CHANGED Viewed

@@ -7,12 +7,12 @@ Original file is located at
 import gradio as gr
 import numpy as np
-from transformers import pipeline
 import os
 import time
 import groq
 import uuid
 import re
 # LangChain imports
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
@@ -29,8 +29,6 @@ import docx  # python-docx for Word files
 import gtts  # Google Text-to-Speech library
 from pptx import Presentation  # python-pptx for PowerPoint files
-import torch
 # Set API Key
 groq.api_key = os.getenv("GROQ_API_KEY")
@@ -85,41 +83,14 @@ Answer: d) 0.4
 Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
 """
-# Fixed Whisper Implementation
-class FixedWhisperTranscriber:
     def __init__(self):
-        self.device = 0 if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {self.device}")
-        # Try multiple models in order
-        self.model = self._load_model()
-    def _load_model(self):
-        """Try loading different models until one works"""
-        models_to_try = [
-            "openai/whisper-base",
-            "openai/whisper-tiny",
-            "openai/whisper-small",
-        ]
-        for model_name in models_to_try:
-            try:
-                print(f"Trying to load: {model_name}")
-                pipe = pipeline(
-                    "automatic-speech-recognition",
-                    model=model_name,
-                    device=self.device,
-                )
-                print(f"✅ Successfully loaded: {model_name}")
-                return pipe
-            except Exception as e:
-                print(f"❌ Failed to load {model_name}: {e}")
-                continue
-        raise Exception("All models failed to load")
     def transcribe_audio(self, audio):
-        """Robust transcription with proper error handling"""
         if audio is None:
             return "Please record audio first"
@@ -136,74 +107,74 @@ class FixedWhisperTranscriber:
             if y.ndim > 1:
                 y = np.mean(y, axis=1)
-            # Convert to float32 and normalize
             y = y.astype(np.float32)
-            if np.max(np.abs(y)) > 0:
-                y = y / np.max(np.abs(y))
-            # Check audio quality
             audio_duration = len(y) / sr
             print(f"Audio duration: {audio_duration:.2f} seconds")
             if audio_duration < 0.5:
                 return "Audio too short. Speak for at least 1 second."
-            if audio_duration > 30:
-                return "Audio too long. Keep it under 30 seconds."
-            # Prepare audio for Whisper
-            audio_dict = {"array": y, "sampling_rate": sr}
-            print("Starting transcription...")
-            # Simple transcription call
-            result = self.model(audio_dict)
-            transcription = result["text"].strip()
-            print(f"Raw transcription: '{transcription}'")
-            # Filter out garbage outputs
-            if self._is_garbage_transcription(transcription):
-                return "No clear speech detected. Please try again with clearer audio."
-            return transcription
         except Exception as e:
-            print(f"Transcription error: {str(e)}")
             return f"Transcription failed: {str(e)}"
-    def _is_garbage_transcription(self, text):
-        """Check if transcription is garbage"""
-        if not text:
-            return True
-        # Common garbage patterns
-        garbage_patterns = [
-            r"^(oh,\s*)+oh$",
-            r"^(ah,\s*)+ah$",
-            r"^(\w+,\s*)+\w+$",  # Repeated single words
-        ]
-        text_lower = text.lower().strip()
-        for pattern in garbage_patterns:
-            if re.match(pattern, text_lower):
-                return True
-        # Check if it's just repetitive nonsense
-        words = text_lower.split()
-        if len(words) > 10:
-            unique_words = len(set(words))
-            if unique_words / len(words) < 0.3:  # Too repetitive
-                return True
-        return False
 # Initialize transcriber
 try:
-    transcriber = FixedWhisperTranscriber()
 except Exception as e:
-    print(f"Failed to initialize transcriber: {e}")
     transcriber = None
 def transcribe_audio(audio):
@@ -227,7 +198,7 @@ def get_transcription_status(audio):
         elif duration > 10:
             return "Processing longer audio..."
         else:
-            return "Processing audio..."
     except:
         return "Ready to record"
@@ -445,11 +416,11 @@ def tutor_ai_chatbot():
                 - 🗣️ Speak at normal volume and pace
                 - 📱 Use a good quality microphone
-                **If you see 'oh oh oh' errors:**
-                - Your audio might be too noisy
-                - Try recording in a quieter place
-                - Speak more clearly and slowly
-                - Use headphones with microphone
                 """)
             # Clear chat history button
@@ -499,7 +470,7 @@ def tutor_ai_chatbot():
                 inputs=audio_input,
                 outputs=msg
             ).then(
-                fn=lambda x: "Transcription completed!" if x and "failed" not in x.lower() and "error" not in x.lower() and "sorry" not in x.lower() else "Ready for new recording",
                 inputs=msg,
                 outputs=transcription_status
             )

 import gradio as gr
 import numpy as np
 import os
 import time
 import groq
 import uuid
 import re
+import tempfile
 # LangChain imports
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 import gtts  # Google Text-to-Speech library
 from pptx import Presentation  # python-pptx for PowerPoint files
 # Set API Key
 groq.api_key = os.getenv("GROQ_API_KEY")
 Feedback: This question tests understanding of Bayes' Theorem by requiring the calculation of conditional probability using the given values.
 """
+# Groq Whisper Transcriber - RELIABLE SOLUTION
+class GroqWhisperTranscriber:
     def __init__(self):
+        self.client = groq.Client(api_key=groq.api_key)
+        print("✅ Groq Whisper transcriber initialized")
     def transcribe_audio(self, audio):
+        """Transcribe audio using Groq's reliable Whisper API"""
         if audio is None:
             return "Please record audio first"
             if y.ndim > 1:
                 y = np.mean(y, axis=1)
+            # Convert to proper format
             y = y.astype(np.float32)
+            # Normalize audio
+            max_val = np.max(np.abs(y))
+            if max_val > 0:
+                y = y / max_val
+            # Check audio duration
             audio_duration = len(y) / sr
             print(f"Audio duration: {audio_duration:.2f} seconds")
             if audio_duration < 0.5:
                 return "Audio too short. Speak for at least 1 second."
+            if audio_duration > 60:
+                return "Audio too long. Keep it under 60 seconds."
+            # Convert to 16-bit PCM for WAV file
+            y_int16 = (y * 32767).astype(np.int16)
+            # Create temporary WAV file
+            import scipy.io.wavfile
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+                temp_path = f.name
+            # Save as WAV file
+            scipy.io.wavfile.write(temp_path, sr, y_int16)
+            print("Sending to Groq Whisper API...")
+            # Transcribe with Groq API - USE TURBO VERSION
+            with open(temp_path, "rb") as audio_file:
+                transcription = self.client.audio.transcriptions.create(
+                    file=(temp_path, audio_file.read(), "audio/wav"),
+                    model="whisper-large-v3-turbo",  # Use the best model
+                    response_format="text",
+                    language="en"  # Optional: specify English for better accuracy
+                )
+            # Clean up temporary file
+            os.unlink(temp_path)
+            text = transcription.strip()
+            print(f"Groq transcription: '{text}'")
+            if not text:
+                return "No speech detected. Please try again."
+            return text
         except Exception as e:
+            print(f"Groq transcription error: {str(e)}")
+            # Clean up temp file if it exists
+            try:
+                if 'temp_path' in locals():
+                    os.unlink(temp_path)
+            except:
+                pass
             return f"Transcription failed: {str(e)}"
 # Initialize transcriber
 try:
+    transcriber = GroqWhisperTranscriber()
+    print("✅ Transcriber initialized successfully with Groq API")
 except Exception as e:
+    print(f"❌ Failed to initialize transcriber: {e}")
     transcriber = None
 def transcribe_audio(audio):
         elif duration > 10:
             return "Processing longer audio..."
         else:
+            return "Processing audio with Groq API..."
     except:
         return "Ready to record"
                 - 🗣️ Speak at normal volume and pace
                 - 📱 Use a good quality microphone
+                **Using Groq Whisper API:**
+                - ✅ High accuracy transcription
+                - ✅ No more "B-B-B" or "oh-oh-oh" errors
+                - ✅ Fast and reliable
+                - ✅ Professional grade speech recognition
                 """)
             # Clear chat history button
                 inputs=audio_input,
                 outputs=msg
             ).then(
+                fn=lambda x: "✅ Transcription completed!" if x and "failed" not in x.lower() and "error" not in x.lower() and "sorry" not in x.lower() else "Ready for new recording",
                 inputs=msg,
                 outputs=transcription_status
             )