Spaces:
Sleeping
Sleeping
File size: 2,019 Bytes
1b65d81 6516b6c 026149f 1b65d81 2c32f32 1b65d81 fd054be 352fca9 fd054be b1b53ca 46d8531 fd054be 46d8531 352fca9 d9fea6d 026149f 352fca9 026149f 352fca9 2c32f32 352fca9 026149f 352fca9 026149f d9fea6d 026149f 1655eaa 1b65d81 8efeaf2 026149f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import gradio as gr
import cv2
import numpy as np
from PIL import Image
import pytesseract
from gtts import gTTS
import tempfile
import os
def preprocess(image):
img = np.array(image)
img = cv2.flip(img, 1) # Mirror-correct
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
gray = cv2.filter2D(gray, -1, kernel)
gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
thresh = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
31, 10
)
return thresh, img
def extract_text_and_speak(image):
processed, flipped_preview = preprocess(image)
processed_pil = Image.fromarray(processed)
preview_pil = Image.fromarray(flipped_preview)
text = pytesseract.image_to_string(processed, lang="eng").strip()
# Create TTS only if text exists
if text and text != "No readable text found.":
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
tts = gTTS(text)
tts.save(tmpfile.name)
audio_file = tmpfile.name
else:
audio_file = None
if not text:
text = "No readable text found."
return preview_pil, processed_pil, text, audio_file
with gr.Blocks() as demo:
gr.Markdown("## GabAI - AI Assistive Reading System")
with gr.Row():
webcam = gr.Image(
type="pil",
sources=["webcam"],
label="Live Webcam (mirrored)"
)
corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")
ocr_text = gr.Textbox(label="Extracted Text")
audio_output = gr.Audio(label="Text-to-Speech Output")
webcam.change(
fn=extract_text_and_speak,
inputs=webcam,
outputs=[corrected_preview, processed_preview, ocr_text, audio_output]
)
if __name__ == "__main__":
demo.launch() |