Arjooohn commited on
Commit
fd054be
·
verified ·
1 Parent(s): 116f556
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -6,25 +6,24 @@ import pytesseract
6
  from gtts import gTTS
7
  import io
8
 
9
- def preprocess(frame):
10
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 
 
11
  kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
12
  gray = cv2.filter2D(gray, -1, kernel)
13
  gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
14
  thresh = cv2.adaptiveThreshold(
15
- gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
16
- cv2.THRESH_BINARY, 31, 10
 
 
17
  )
18
  return thresh
19
 
20
- def process_video(frame):
21
- if frame is None:
22
- return None, "No frame captured", None
23
-
24
- frame = cv2.flip(frame, 1) # mirror
25
- processed = preprocess(frame)
26
  processed_pil = Image.fromarray(processed)
27
-
28
  text = pytesseract.image_to_string(processed, lang="eng").strip()
29
  if text == "":
30
  text = "No readable text found."
@@ -35,17 +34,16 @@ def process_video(frame):
35
  tts.write_to_fp(buffer)
36
  buffer.seek(0)
37
  audio_file = buffer
38
-
39
  return processed_pil, text, audio_file
40
 
41
  with gr.Blocks() as demo:
42
- gr.Markdown("## GabAI - Real-Time OCR with Webcam")
43
 
44
  with gr.Row():
45
- webcam = gr.Video(
46
- sources=["webcam"], # corrected argument
47
- type="numpy",
48
- label="Webcam Feed"
49
  )
50
  processed_preview = gr.Image(type="pil", label="Processed Preview")
51
 
@@ -53,7 +51,7 @@ with gr.Blocks() as demo:
53
  audio_output = gr.Audio(label="Text-to-Speech Output")
54
 
55
  webcam.change(
56
- fn=process_video,
57
  inputs=webcam,
58
  outputs=[processed_preview, ocr_text, audio_output]
59
  )
 
6
  from gtts import gTTS
7
  import io
8
 
9
+ def preprocess(image):
10
+ img = np.array(image)
11
+ img = cv2.flip(img, 1)
12
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
13
  kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
14
  gray = cv2.filter2D(gray, -1, kernel)
15
  gray = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
16
  thresh = cv2.adaptiveThreshold(
17
+ gray, 255,
18
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
19
+ cv2.THRESH_BINARY,
20
+ 31, 10
21
  )
22
  return thresh
23
 
24
+ def extract_text_and_speak(image):
25
+ processed = preprocess(image)
 
 
 
 
26
  processed_pil = Image.fromarray(processed)
 
27
  text = pytesseract.image_to_string(processed, lang="eng").strip()
28
  if text == "":
29
  text = "No readable text found."
 
34
  tts.write_to_fp(buffer)
35
  buffer.seek(0)
36
  audio_file = buffer
 
37
  return processed_pil, text, audio_file
38
 
39
  with gr.Blocks() as demo:
40
+ gr.Markdown("## GabAI - AI Assistive Reading System")
41
 
42
  with gr.Row():
43
+ webcam = gr.Image(
44
+ type="pil",
45
+ sources=["webcam"],
46
+ label="Webcam Input"
47
  )
48
  processed_preview = gr.Image(type="pil", label="Processed Preview")
49
 
 
51
  audio_output = gr.Audio(label="Text-to-Speech Output")
52
 
53
  webcam.change(
54
+ fn=extract_text_and_speak,
55
  inputs=webcam,
56
  outputs=[processed_preview, ocr_text, audio_output]
57
  )