Spaces:

gabai-capstone
/

GabAI

Sleeping

App Files Files Community

Arjooohn commited on about 1 month ago

Commit

fd054be

verified ·

1 Parent(s): 116f556

fix bugs

Browse files

Files changed (1) hide show

app.py +16 -18

app.py CHANGED Viewed

@@ -6,25 +6,24 @@ import pytesseract
 from gtts import gTTS
 import io
-def preprocess(frame):
-    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
     kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
     gray = cv2.filter2D(gray, -1, kernel)
     gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
     thresh = cv2.adaptiveThreshold(
-        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY, 31, 10
     )
     return thresh
-def process_video(frame):
-    if frame is None:
-        return None, "No frame captured", None
-    frame = cv2.flip(frame, 1)  # mirror
-    processed = preprocess(frame)
     processed_pil = Image.fromarray(processed)
     text = pytesseract.image_to_string(processed, lang="eng").strip()
     if text == "":
         text = "No readable text found."
@@ -35,17 +34,16 @@ def process_video(frame):
         tts.write_to_fp(buffer)
         buffer.seek(0)
         audio_file = buffer
     return processed_pil, text, audio_file
 with gr.Blocks() as demo:
-    gr.Markdown("## GabAI - Real-Time OCR with Webcam")
     with gr.Row():
-        webcam = gr.Video(
-            sources=["webcam"],   # corrected argument
-            type="numpy",
-            label="Webcam Feed"
         )
         processed_preview = gr.Image(type="pil", label="Processed Preview")
@@ -53,7 +51,7 @@ with gr.Blocks() as demo:
     audio_output = gr.Audio(label="Text-to-Speech Output")
     webcam.change(
-        fn=process_video,
         inputs=webcam,
         outputs=[processed_preview, ocr_text, audio_output]
     )

 from gtts import gTTS
 import io
+def preprocess(image):
+    img = np.array(image)
+    img = cv2.flip(img, 1)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
     gray = cv2.filter2D(gray, -1, kernel)
     gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
     thresh = cv2.adaptiveThreshold(
+        gray, 255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY,
+        31, 10
     )
     return thresh
+def extract_text_and_speak(image):
+    processed = preprocess(image)
     processed_pil = Image.fromarray(processed)
     text = pytesseract.image_to_string(processed, lang="eng").strip()
     if text == "":
         text = "No readable text found."
         tts.write_to_fp(buffer)
         buffer.seek(0)
         audio_file = buffer
     return processed_pil, text, audio_file
 with gr.Blocks() as demo:
+    gr.Markdown("## GabAI - AI Assistive Reading System")
     with gr.Row():
+        webcam = gr.Image(
+            type="pil",
+            sources=["webcam"],
+            label="Webcam Input"
         )
         processed_preview = gr.Image(type="pil", label="Processed Preview")
     audio_output = gr.Audio(label="Text-to-Speech Output")
     webcam.change(
+        fn=extract_text_and_speak,
         inputs=webcam,
         outputs=[processed_preview, ocr_text, audio_output]
     )