Spaces:
Sleeping
Sleeping
Fix bugs
Browse files
app.py
CHANGED
|
@@ -7,13 +7,8 @@ from gtts import gTTS
|
|
| 7 |
import io
|
| 8 |
|
| 9 |
def preprocess(image):
|
| 10 |
-
"""
|
| 11 |
-
Preprocess the image for OCR:
|
| 12 |
-
- Flip horizontally to correct mirror
|
| 13 |
-
- Grayscale, sharpen, denoise, threshold
|
| 14 |
-
"""
|
| 15 |
img = np.array(image)
|
| 16 |
-
img = cv2.flip(img, 1) #
|
| 17 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 18 |
kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
|
| 19 |
gray = cv2.filter2D(gray, -1, kernel)
|
|
@@ -24,26 +19,26 @@ def preprocess(image):
|
|
| 24 |
cv2.THRESH_BINARY,
|
| 25 |
31, 10
|
| 26 |
)
|
| 27 |
-
return thresh, img
|
| 28 |
|
| 29 |
def extract_text_and_speak(image):
|
| 30 |
-
"""
|
| 31 |
-
Runs OCR and TTS on the captured image
|
| 32 |
-
"""
|
| 33 |
processed, flipped_preview = preprocess(image)
|
| 34 |
processed_pil = Image.fromarray(processed)
|
| 35 |
-
preview_pil = Image.fromarray(flipped_preview)
|
| 36 |
|
| 37 |
text = pytesseract.image_to_string(processed, lang="eng").strip()
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
else:
|
| 42 |
tts = gTTS(text)
|
| 43 |
buffer = io.BytesIO()
|
| 44 |
tts.write_to_fp(buffer)
|
| 45 |
buffer.seek(0)
|
| 46 |
audio_file = buffer
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
return preview_pil, processed_pil, text, audio_file
|
| 49 |
|
|
@@ -53,10 +48,9 @@ with gr.Blocks() as demo:
|
|
| 53 |
with gr.Row():
|
| 54 |
webcam = gr.Image(
|
| 55 |
type="pil",
|
| 56 |
-
sources=["webcam"],
|
| 57 |
label="Live Webcam (mirrored)"
|
| 58 |
)
|
| 59 |
-
# Show mirror-corrected preview for the user
|
| 60 |
corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
|
| 61 |
|
| 62 |
processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")
|
|
|
|
| 7 |
import io
|
| 8 |
|
| 9 |
def preprocess(image):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
img = np.array(image)
|
| 11 |
+
img = cv2.flip(img, 1) # Mirror-correct
|
| 12 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 13 |
kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
|
| 14 |
gray = cv2.filter2D(gray, -1, kernel)
|
|
|
|
| 19 |
cv2.THRESH_BINARY,
|
| 20 |
31, 10
|
| 21 |
)
|
| 22 |
+
return thresh, img
|
| 23 |
|
| 24 |
def extract_text_and_speak(image):
|
|
|
|
|
|
|
|
|
|
| 25 |
processed, flipped_preview = preprocess(image)
|
| 26 |
processed_pil = Image.fromarray(processed)
|
| 27 |
+
preview_pil = Image.fromarray(flipped_preview)
|
| 28 |
|
| 29 |
text = pytesseract.image_to_string(processed, lang="eng").strip()
|
| 30 |
+
|
| 31 |
+
# Create TTS only if text exists
|
| 32 |
+
if text and text != "No readable text found.":
|
|
|
|
| 33 |
tts = gTTS(text)
|
| 34 |
buffer = io.BytesIO()
|
| 35 |
tts.write_to_fp(buffer)
|
| 36 |
buffer.seek(0)
|
| 37 |
audio_file = buffer
|
| 38 |
+
else:
|
| 39 |
+
audio_file = None
|
| 40 |
+
if not text:
|
| 41 |
+
text = "No readable text found."
|
| 42 |
|
| 43 |
return preview_pil, processed_pil, text, audio_file
|
| 44 |
|
|
|
|
| 48 |
with gr.Row():
|
| 49 |
webcam = gr.Image(
|
| 50 |
type="pil",
|
| 51 |
+
sources=["webcam"],
|
| 52 |
label="Live Webcam (mirrored)"
|
| 53 |
)
|
|
|
|
| 54 |
corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
|
| 55 |
|
| 56 |
processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")
|