Spaces:

gabai-capstone
/

GabAI

Sleeping

App Files Files Community

Arjooohn commited on about 1 month ago

Commit

352fca9

verified ·

1 Parent(s): 026149f

Fix bugs

Browse files

Files changed (1) hide show

app.py +11 -17

app.py CHANGED Viewed

@@ -7,13 +7,8 @@ from gtts import gTTS
 import io
 def preprocess(image):
-    """
-    Preprocess the image for OCR:
-    - Flip horizontally to correct mirror
-    - Grayscale, sharpen, denoise, threshold
-    """
     img = np.array(image)
-    img = cv2.flip(img, 1)  # Correct mirror for OCR
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
     gray = cv2.filter2D(gray, -1, kernel)
@@ -24,26 +19,26 @@ def preprocess(image):
         cv2.THRESH_BINARY,
         31, 10
     )
-    return thresh, img  # Return processed for OCR and flipped image for preview
 def extract_text_and_speak(image):
-    """
-    Runs OCR and TTS on the captured image
-    """
     processed, flipped_preview = preprocess(image)
     processed_pil = Image.fromarray(processed)
-    preview_pil = Image.fromarray(flipped_preview)  # mirror-corrected preview
     text = pytesseract.image_to_string(processed, lang="eng").strip()
-    if text == "":
-        text = "No readable text found."
-        audio_file = None
-    else:
         tts = gTTS(text)
         buffer = io.BytesIO()
         tts.write_to_fp(buffer)
         buffer.seek(0)
         audio_file = buffer
     return preview_pil, processed_pil, text, audio_file
@@ -53,10 +48,9 @@ with gr.Blocks() as demo:
     with gr.Row():
         webcam = gr.Image(
             type="pil",
-            sources=["webcam"],  # user captures frame
             label="Live Webcam (mirrored)"
         )
-        # Show mirror-corrected preview for the user
         corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
     processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")

 import io
 def preprocess(image):
     img = np.array(image)
+    img = cv2.flip(img, 1)  # Mirror-correct
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
     gray = cv2.filter2D(gray, -1, kernel)
         cv2.THRESH_BINARY,
         31, 10
     )
+    return thresh, img
 def extract_text_and_speak(image):
     processed, flipped_preview = preprocess(image)
     processed_pil = Image.fromarray(processed)
+    preview_pil = Image.fromarray(flipped_preview)
     text = pytesseract.image_to_string(processed, lang="eng").strip()
+    # Create TTS only if text exists
+    if text and text != "No readable text found.":
         tts = gTTS(text)
         buffer = io.BytesIO()
         tts.write_to_fp(buffer)
         buffer.seek(0)
         audio_file = buffer
+    else:
+        audio_file = None
+        if not text:
+            text = "No readable text found."
     return preview_pil, processed_pil, text, audio_file
     with gr.Row():
         webcam = gr.Image(
             type="pil",
+            sources=["webcam"],
             label="Live Webcam (mirrored)"
         )
         corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
     processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")