Spaces:

Lesterchia1
/

FPOC2_AI-Tutor_Chatbot

Running

App Files Files Community

Chia Woon Yap commited on Nov 21, 2025

Commit

f97b708

verified ·

1 Parent(s): 36e9420

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -16

app.py CHANGED Viewed

@@ -13,6 +13,10 @@ import time
 import groq
 import uuid  # For generating unique filenames
 # NEW IMPORTS (current):
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
@@ -274,6 +278,59 @@ def process_document(file):
 #Quick Fixes You Can Try First:
 def transcribe_audio(audio):
     """Real-time optimized transcription"""
     if audio is None:
@@ -283,28 +340,35 @@ def transcribe_audio(audio):
     # Quick preprocessing
     if y.ndim > 1:
-        y = y.mean(axis=1)
     y = y.astype(np.float32)
     max_val = np.max(np.abs(y))
     if max_val > 0:
         y = y / max_val
-    # Use tiny model for real-time speed
-    realtime_transcriber = pipeline(
-        "automatic-speech-recognition",
-        model="openai/whisper-tiny.en",  # Fastest model
-        device="cuda" if torch.cuda.is_available() else "cpu",
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        generate_kwargs={
-            "language": "english",
-            "task": "transcribe",
-            "temperature": 0.0,  # More deterministic
-            "no_repeat_ngram_size": 2
-        }
-    )
-    return realtime_transcriber({"sampling_rate": sr, "raw": y})["text"]
 # the remaining is the same

 import groq
 import uuid  # For generating unique filenames
+# Add torch imports at the top
+import torch
+import torchaudio
 # NEW IMPORTS (current):
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 #Quick Fixes You Can Try First:
+#def transcribe_audio(audio):
+#    """Real-time optimized transcription"""
+#    if audio is None:
+#        return ""
+#    sr, y = audio
+#    # Quick preprocessing
+#    if y.ndim > 1:
+#        y = y.mean(axis=1)
+#    y = y.astype(np.float32)
+#    max_val = np.max(np.abs(y))
+#    if max_val > 0:
+#        y = y / max_val
+#    # Use tiny model for real-time speed
+#    realtime_transcriber = pipeline(
+#        "automatic-speech-recognition",
+#        model="openai/whisper-tiny.en",  # Fastest model
+#        device="cuda" if torch.cuda.is_available() else "cpu",
+#        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+#        generate_kwargs={
+#            "language": "english",
+#            "task": "transcribe",
+#            "temperature": 0.0,  # More deterministic
+#            "no_repeat_ngram_size": 2
+#        }
+#    )
+#
+#    return realtime_transcriber({"sampling_rate": sr, "raw": y})["text"]
+#end
+# Real-time Whisper setup - cache the model
+@gr.cache_resource
+def load_realtime_whisper():
+    """Load optimized Whisper model for real-time transcription"""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    # Use tiny model for real-time speed
+    realtime_transcriber = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-tiny.en",
+        device=device,
+        torch_dtype=torch_dtype,
+    )
+    return realtime_transcriber
+# Load model at startup
+realtime_transcriber = load_realtime_whisper()
 def transcribe_audio(audio):
     """Real-time optimized transcription"""
     if audio is None:
     # Quick preprocessing
     if y.ndim > 1:
+        y = y.mean(axis=1)  # Convert to mono
     y = y.astype(np.float32)
     max_val = np.max(np.abs(y))
     if max_val > 0:
         y = y / max_val
+    try:
+        # Use real-time transcriber with optimized settings
+        result = realtime_transcriber(
+            {"sampling_rate": sr, "raw": y},
+            generate_kwargs={
+                "language": "english",
+                "task": "transcribe",
+                "temperature": 0.0,  # More deterministic
+                "no_repeat_ngram_size": 2,  # Reduce repetitions
+            }
+        )
+        return result["text"]
+    except Exception as e:
+        print(f"Transcription error: {e}")
+        return "Could not transcribe audio. Please try again."
 # the remaining is the same