Spaces:

gabai-capstone
/

GabAI

Sleeping

App Files Files Community

GabAI / app.py

Arjooohn

Fix bugs

2c32f32 verified 28 days ago

raw

history blame contribute delete

2.02 kB

	import gradio as gr
	import cv2
	import numpy as np
	from PIL import Image
	import pytesseract
	from gtts import gTTS
	import tempfile
	import os

	def preprocess(image):
	img = np.array(image)
	img = cv2.flip(img, 1) # Mirror-correct
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
	gray = cv2.filter2D(gray, -1, kernel)
	gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
	thresh = cv2.adaptiveThreshold(
	gray, 255,
	cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY,
	31, 10
	)
	return thresh, img

	def extract_text_and_speak(image):
	processed, flipped_preview = preprocess(image)
	processed_pil = Image.fromarray(processed)
	preview_pil = Image.fromarray(flipped_preview)

	text = pytesseract.image_to_string(processed, lang="eng").strip()

	# Create TTS only if text exists
	if text and text != "No readable text found.":
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
	tts = gTTS(text)
	tts.save(tmpfile.name)
	audio_file = tmpfile.name
	else:
	audio_file = None
	if not text:
	text = "No readable text found."

	return preview_pil, processed_pil, text, audio_file

	with gr.Blocks() as demo:
	gr.Markdown("## GabAI - AI Assistive Reading System")

	with gr.Row():
	webcam = gr.Image(
	type="pil",
	sources=["webcam"],
	label="Live Webcam (mirrored)"
	)
	corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")

	processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")
	ocr_text = gr.Textbox(label="Extracted Text")
	audio_output = gr.Audio(label="Text-to-Speech Output")

	webcam.change(
	fn=extract_text_and_speak,
	inputs=webcam,
	outputs=[corrected_preview, processed_preview, ocr_text, audio_output]
	)

	if __name__ == "__main__":
	demo.launch()