Spaces:

gabai-capstone
/

GabAI

Sleeping

Arjooohn commited on Mar 15

Commit

8efeaf2

verified ·

1 Parent(s): fbbff72

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,10 @@ import os
 def preprocess(image):
     """
-    Preprocess the image for OCR.
     """
     img = np.array(image)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -20,33 +23,34 @@ def preprocess(image):
     return thresh
 def extract_and_speak(image):
     processed = preprocess(image)
     text = pytesseract.image_to_string(processed, lang="eng")
     if text.strip() == "":
         return "No readable text found.", None
     tts = gTTS(text)
     tts.save("output.mp3")
     return text, "output.mp3"
-with gr.Blocks() as demo:
-    gr.Markdown("## GabAI - AI Assistive Reading System")
-    gr.Markdown(
-        "Use your webcam to capture printed text. The preview below is mirrored for convenience, "
-        "but OCR uses the original image."
     )
-    # Webcam input (sources instead of source)
-    webcam = gr.Image(type="pil", sources=["webcam"])
-    # Mirror the preview using CSS (client-side only)
-    webcam.style(**{"transform": "scaleX(-1)"})
-    # Outputs
-    text_output = gr.Textbox(label="Extracted Text")
-    audio_output = gr.Audio(label="Text-to-Speech Output")
-    # Button to process
-    submit = gr.Button("Read Text")
-    submit.click(fn=extract_and_speak, inputs=webcam, outputs=[text_output, audio_output])
-demo.launch()

 def preprocess(image):
     """
+    Preprocess the image for OCR:
+    - Convert to grayscale
+    - Apply Gaussian blur
+    - Apply Otsu threshold
     """
     img = np.array(image)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     return thresh
 def extract_and_speak(image):
+    """
+    Extract text from image using pytesseract and convert it to speech.
+    """
     processed = preprocess(image)
     text = pytesseract.image_to_string(processed, lang="eng")
     if text.strip() == "":
         return "No readable text found.", None
     tts = gTTS(text)
     tts.save("output.mp3")
     return text, "output.mp3"
+# Gradio interface
+interface = gr.Interface(
+    fn=extract_and_speak,
+    inputs=gr.Image(type="pil", sources=["webcam"]),  # Webcam only
+    outputs=[
+        gr.Textbox(label="Extracted Text"),
+        gr.Audio(label="Text-to-Speech Output")
+    ],
+    title="GabAI - AI Assistive Reading System",
+    description=(
+        "Use your webcam to capture printed text. "
+        "The system extracts the text and converts it into speech."
     )
+)
+if __name__ == "__main__":
+    interface.launch()