Spaces:

Lesterchia1
/

FPOC2_AI-Tutor_Chatbot

Running

App Files Files Community

Chia Woon Yap commited on Nov 21, 2025

Commit

19a0f6f

verified ·

1 Parent(s): 165d756

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -55

app.py CHANGED Viewed

@@ -299,80 +299,42 @@ def process_document(file):
 # Load model at startup
-#realtime_transcriber = load_realtime_whisper()
-#def transcribe_audio(audio):
-#    """Real-time optimized transcription"""
-#    if audio is None:
-#        return ""
-#    sr, y = audio
-    # Quick preprocessing
-#    if y.ndim > 1:
-#        y = y.mean(axis=1)  # Convert to mono
-#    y = y.astype(np.float32)
-#    max_val = np.max(np.abs(y))
-#    if max_val > 0:
-#        y = y / max_val
-#
-#    try:
-#        # Use real-time transcriber with optimized settings
-#        result = realtime_transcriber(
-#            {"sampling_rate": sr, "raw": y},
-#            generate_kwargs={
-#                "language": "english",
-#                "task": "transcribe",
-#                "temperature": 0.0,  # More deterministic
-#                "no_repeat_ngram_size": 2,  # Reduce repetitions
-#            }
-#        )
-#        return result["text"]
-#    except Exception as e:
-#        print(f"Transcription error: {e}")
-#        return "Could not transcribe audio. Please try again."
-#Common Issue 1: Audio Format Problems
 def transcribe_audio(audio):
-    """Fixed version - handles audio format issues"""
     if audio is None:
         return "Please record audio first"
     try:
         sr, y = audio
-        # FIX: Handle different audio formats from Gradio
-        if isinstance(y, np.ndarray):
-            # Standard numpy array format
-            if y.ndim > 1:
-                y = y.mean(axis=1)  # Stereo to mono
-            y = y.astype(np.float32)
-            # Normalize volume
-            if np.max(np.abs(y)) > 0:
-                y = y / np.max(np.abs(y))
-        else:
-            return "Unsupported audio format"
-        # FIX: Use a more reliable approach
         transcriber = pipeline(
             "automatic-speech-recognition",
             model="openai/whisper-base.en"
         )
-        # FIX: Ensure proper input format
         result = transcriber({"sampling_rate": sr, "raw": y})
         text = result["text"].strip()
-        return text if text else "I heard audio but no clear speech. Try speaking louder."
     except Exception as e:
-        return f"Please try again - {str(e)}"

 # Load model at startup
+# Function to handle speech-to-text conversion
 def transcribe_audio(audio):
+    """Simple working transcription"""
     if audio is None:
         return "Please record audio first"
     try:
         sr, y = audio
+        # Basic preprocessing
+        if y.ndim > 1:
+            y = y.mean(axis=1)  # Convert to mono
+        y = y.astype(np.float32)
+        max_val = np.max(np.abs(y))
+        if max_val > 0:
+            y = y / max_val
+        # Simple pipeline call
         transcriber = pipeline(
             "automatic-speech-recognition",
             model="openai/whisper-base.en"
         )
         result = transcriber({"sampling_rate": sr, "raw": y})
         text = result["text"].strip()
+        return text if text else "No clear speech detected. Try speaking louder."
     except Exception as e:
+        return f"Recording error: {str(e)}"
+# Clear chat history function
+def clear_chat_history():
+    chat_memory.clear()
+    return [], None