Spaces:

Lesterchia1
/

FPOC2_AI-Tutor_Chatbot

Running

Chia Woon Yap commited on Nov 21, 2025

Commit

f2bf26d

verified ·

1 Parent(s): 539cc55

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -275,42 +275,7 @@ def process_document(file):
 #    y /= np.max(np.abs(y))
 #    return transcriber({"sampling_rate": sr, "raw": y})["text"]
-#Quick Fixes You Can Try First:
-#def transcribe_audio(audio):
-#    """Real-time optimized transcription"""
-#    if audio is None:
-#        return ""
-#    sr, y = audio
-#    # Quick preprocessing
-#    if y.ndim > 1:
-#        y = y.mean(axis=1)
-#    y = y.astype(np.float32)
-#    max_val = np.max(np.abs(y))
-#    if max_val > 0:
-#        y = y / max_val
-#    # Use tiny model for real-time speed
-#    realtime_transcriber = pipeline(
-#        "automatic-speech-recognition",
-#        model="openai/whisper-tiny.en",  # Fastest model
-#        device="cuda" if torch.cuda.is_available() else "cpu",
-#        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-#        generate_kwargs={
-#            "language": "english",
-#            "task": "transcribe",
-#            "temperature": 0.0,  # More deterministic
-#            "no_repeat_ngram_size": 2
-#        }
-#    )
-#
-#    return realtime_transcriber({"sampling_rate": sr, "raw": y})["text"]
-#end
 # Real-time Whisper setup - cache the model
 #@gr.cache_resource
 def load_realtime_whisper():
@@ -363,9 +328,45 @@ def transcribe_audio(audio):
         print(f"Transcription error: {e}")
         return "Could not transcribe audio. Please try again."

 #    y /= np.max(np.abs(y))
 #    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+"""
 # Real-time Whisper setup - cache the model
 #@gr.cache_resource
 def load_realtime_whisper():
         print(f"Transcription error: {e}")
         return "Could not transcribe audio. Please try again."
+"""
+#Common Issue 1: Audio Format Problems
+def transcribe_audio(audio):
+    """Fixed version - handles audio format issues"""
+    if audio is None:
+        return "Please record audio first"
+    try:
+        sr, y = audio
+        # FIX: Handle different audio formats from Gradio
+        if isinstance(y, np.ndarray):
+            # Standard numpy array format
+            if y.ndim > 1:
+                y = y.mean(axis=1)  # Stereo to mono
+            y = y.astype(np.float32)
+            # Normalize volume
+            if np.max(np.abs(y)) > 0:
+                y = y / np.max(np.abs(y))
+        else:
+            return "Unsupported audio format"
+        # FIX: Use a more reliable approach
+        transcriber = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-base.en"
+        )
+        # FIX: Ensure proper input format
+        result = transcriber({"sampling_rate": sr, "raw": y})
+        text = result["text"].strip()
+        return text if text else "I heard audio but no clear speech. Try speaking louder."
+    except Exception as e:
+        return f"Please try again - {str(e)}"