Arjooohn commited on
Commit
6516b6c
·
verified ·
1 Parent(s): 3b3a60d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -2,10 +2,11 @@ import gradio as gr
2
  import pytesseract
3
  import cv2
4
  import numpy as np
 
5
  from gtts import gTTS
 
6
 
7
  def preprocess(image):
8
- # Convert to grayscale, blur, threshold; no flipping
9
  img = np.array(image)
10
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
11
  blur = cv2.GaussianBlur(gray, (5,5), 0)
@@ -17,6 +18,7 @@ def preprocess(image):
17
 
18
  def extract_and_speak(image):
19
  processed = preprocess(image)
 
20
  text = pytesseract.image_to_string(processed, lang="eng")
21
 
22
  if text.strip() == "":
@@ -29,13 +31,13 @@ def extract_and_speak(image):
29
 
30
  interface = gr.Interface(
31
  fn=extract_and_speak,
32
- inputs=gr.Image(type="pil", sources=["webcam"]), # Webcam preview mirrored by default
33
  outputs=[
34
  gr.Textbox(label="Extracted Text"),
35
  gr.Audio(label="Text-to-Speech Output")
36
  ],
37
  title="GabAI - AI Assistive Reading System",
38
- description="Use your webcam to capture printed text. Preview is mirrored, but captured image is normal for OCR."
39
  )
40
 
41
  interface.launch()
 
2
  import pytesseract
3
  import cv2
4
  import numpy as np
5
+ from PIL import Image
6
  from gtts import gTTS
7
+ import os
8
 
9
  def preprocess(image):
 
10
  img = np.array(image)
11
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
12
  blur = cv2.GaussianBlur(gray, (5,5), 0)
 
18
 
19
  def extract_and_speak(image):
20
  processed = preprocess(image)
21
+
22
  text = pytesseract.image_to_string(processed, lang="eng")
23
 
24
  if text.strip() == "":
 
31
 
32
  interface = gr.Interface(
33
  fn=extract_and_speak,
34
+ inputs=gr.Image(type="pil", source="webcam", tool=None), # Only webcam, no upload/paste
35
  outputs=[
36
  gr.Textbox(label="Extracted Text"),
37
  gr.Audio(label="Text-to-Speech Output")
38
  ],
39
  title="GabAI - AI Assistive Reading System",
40
+ description="Use your webcam to capture printed text. The system extracts the text and converts it into speech."
41
  )
42
 
43
  interface.launch()