GabAI / app.py
Arjooohn's picture
Fix bugs
2c32f32 verified
import gradio as gr
import cv2
import numpy as np
from PIL import Image
import pytesseract
from gtts import gTTS
import tempfile
import os
def preprocess(image):
img = np.array(image)
img = cv2.flip(img, 1) # Mirror-correct
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
gray = cv2.filter2D(gray, -1, kernel)
gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
31, 10
)
return thresh, img
def extract_text_and_speak(image):
processed, flipped_preview = preprocess(image)
processed_pil = Image.fromarray(processed)
preview_pil = Image.fromarray(flipped_preview)
text = pytesseract.image_to_string(processed, lang="eng").strip()
# Create TTS only if text exists
if text and text != "No readable text found.":
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
tts = gTTS(text)
tts.save(tmpfile.name)
audio_file = tmpfile.name
else:
audio_file = None
if not text:
text = "No readable text found."
return preview_pil, processed_pil, text, audio_file
with gr.Blocks() as demo:
gr.Markdown("## GabAI - AI Assistive Reading System")
with gr.Row():
webcam = gr.Image(
type="pil",
sources=["webcam"],
label="Live Webcam (mirrored)"
)
corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")
ocr_text = gr.Textbox(label="Extracted Text")
audio_output = gr.Audio(label="Text-to-Speech Output")
webcam.change(
fn=extract_text_and_speak,
inputs=webcam,
outputs=[corrected_preview, processed_preview, ocr_text, audio_output]
)
if __name__ == "__main__":
demo.launch()