Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,10 @@ import os
|
|
| 8 |
|
| 9 |
def preprocess(image):
|
| 10 |
"""
|
| 11 |
-
Preprocess the image for OCR
|
|
|
|
|
|
|
|
|
|
| 12 |
"""
|
| 13 |
img = np.array(image)
|
| 14 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
@@ -20,33 +23,34 @@ def preprocess(image):
|
|
| 20 |
return thresh
|
| 21 |
|
| 22 |
def extract_and_speak(image):
|
|
|
|
|
|
|
|
|
|
| 23 |
processed = preprocess(image)
|
| 24 |
text = pytesseract.image_to_string(processed, lang="eng")
|
|
|
|
| 25 |
if text.strip() == "":
|
| 26 |
return "No readable text found.", None
|
|
|
|
| 27 |
tts = gTTS(text)
|
| 28 |
tts.save("output.mp3")
|
|
|
|
| 29 |
return text, "output.mp3"
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
)
|
| 37 |
-
|
| 38 |
-
# Webcam input (sources instead of source)
|
| 39 |
-
webcam = gr.Image(type="pil", sources=["webcam"])
|
| 40 |
-
|
| 41 |
-
# Mirror the preview using CSS (client-side only)
|
| 42 |
-
webcam.style(**{"transform": "scaleX(-1)"})
|
| 43 |
-
|
| 44 |
-
# Outputs
|
| 45 |
-
text_output = gr.Textbox(label="Extracted Text")
|
| 46 |
-
audio_output = gr.Audio(label="Text-to-Speech Output")
|
| 47 |
-
|
| 48 |
-
# Button to process
|
| 49 |
-
submit = gr.Button("Read Text")
|
| 50 |
-
submit.click(fn=extract_and_speak, inputs=webcam, outputs=[text_output, audio_output])
|
| 51 |
|
| 52 |
-
|
|
|
|
|
|
| 8 |
|
| 9 |
def preprocess(image):
|
| 10 |
"""
|
| 11 |
+
Preprocess the image for OCR:
|
| 12 |
+
- Convert to grayscale
|
| 13 |
+
- Apply Gaussian blur
|
| 14 |
+
- Apply Otsu threshold
|
| 15 |
"""
|
| 16 |
img = np.array(image)
|
| 17 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
| 23 |
return thresh
|
| 24 |
|
| 25 |
def extract_and_speak(image):
|
| 26 |
+
"""
|
| 27 |
+
Extract text from image using pytesseract and convert it to speech.
|
| 28 |
+
"""
|
| 29 |
processed = preprocess(image)
|
| 30 |
text = pytesseract.image_to_string(processed, lang="eng")
|
| 31 |
+
|
| 32 |
if text.strip() == "":
|
| 33 |
return "No readable text found.", None
|
| 34 |
+
|
| 35 |
tts = gTTS(text)
|
| 36 |
tts.save("output.mp3")
|
| 37 |
+
|
| 38 |
return text, "output.mp3"
|
| 39 |
|
| 40 |
+
# Gradio interface
|
| 41 |
+
interface = gr.Interface(
|
| 42 |
+
fn=extract_and_speak,
|
| 43 |
+
inputs=gr.Image(type="pil", sources=["webcam"]), # Webcam only
|
| 44 |
+
outputs=[
|
| 45 |
+
gr.Textbox(label="Extracted Text"),
|
| 46 |
+
gr.Audio(label="Text-to-Speech Output")
|
| 47 |
+
],
|
| 48 |
+
title="GabAI - AI Assistive Reading System",
|
| 49 |
+
description=(
|
| 50 |
+
"Use your webcam to capture printed text. "
|
| 51 |
+
"The system extracts the text and converts it into speech."
|
| 52 |
)
|
| 53 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
interface.launch()
|