Arjooohn commited on
Commit
352fca9
·
verified ·
1 Parent(s): 026149f
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -7,13 +7,8 @@ from gtts import gTTS
7
  import io
8
 
9
  def preprocess(image):
10
- """
11
- Preprocess the image for OCR:
12
- - Flip horizontally to correct mirror
13
- - Grayscale, sharpen, denoise, threshold
14
- """
15
  img = np.array(image)
16
- img = cv2.flip(img, 1) # Correct mirror for OCR
17
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
18
  kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
19
  gray = cv2.filter2D(gray, -1, kernel)
@@ -24,26 +19,26 @@ def preprocess(image):
24
  cv2.THRESH_BINARY,
25
  31, 10
26
  )
27
- return thresh, img # Return processed for OCR and flipped image for preview
28
 
29
  def extract_text_and_speak(image):
30
- """
31
- Runs OCR and TTS on the captured image
32
- """
33
  processed, flipped_preview = preprocess(image)
34
  processed_pil = Image.fromarray(processed)
35
- preview_pil = Image.fromarray(flipped_preview) # mirror-corrected preview
36
 
37
  text = pytesseract.image_to_string(processed, lang="eng").strip()
38
- if text == "":
39
- text = "No readable text found."
40
- audio_file = None
41
- else:
42
  tts = gTTS(text)
43
  buffer = io.BytesIO()
44
  tts.write_to_fp(buffer)
45
  buffer.seek(0)
46
  audio_file = buffer
 
 
 
 
47
 
48
  return preview_pil, processed_pil, text, audio_file
49
 
@@ -53,10 +48,9 @@ with gr.Blocks() as demo:
53
  with gr.Row():
54
  webcam = gr.Image(
55
  type="pil",
56
- sources=["webcam"], # user captures frame
57
  label="Live Webcam (mirrored)"
58
  )
59
- # Show mirror-corrected preview for the user
60
  corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
61
 
62
  processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")
 
7
  import io
8
 
9
  def preprocess(image):
 
 
 
 
 
10
  img = np.array(image)
11
+ img = cv2.flip(img, 1) # Mirror-correct
12
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
13
  kernel = np.array([[0,-1,0], [-1,5,-1], [0,-1,0]])
14
  gray = cv2.filter2D(gray, -1, kernel)
 
19
  cv2.THRESH_BINARY,
20
  31, 10
21
  )
22
+ return thresh, img
23
 
24
  def extract_text_and_speak(image):
 
 
 
25
  processed, flipped_preview = preprocess(image)
26
  processed_pil = Image.fromarray(processed)
27
+ preview_pil = Image.fromarray(flipped_preview)
28
 
29
  text = pytesseract.image_to_string(processed, lang="eng").strip()
30
+
31
+ # Create TTS only if text exists
32
+ if text and text != "No readable text found.":
 
33
  tts = gTTS(text)
34
  buffer = io.BytesIO()
35
  tts.write_to_fp(buffer)
36
  buffer.seek(0)
37
  audio_file = buffer
38
+ else:
39
+ audio_file = None
40
+ if not text:
41
+ text = "No readable text found."
42
 
43
  return preview_pil, processed_pil, text, audio_file
44
 
 
48
  with gr.Row():
49
  webcam = gr.Image(
50
  type="pil",
51
+ sources=["webcam"],
52
  label="Live Webcam (mirrored)"
53
  )
 
54
  corrected_preview = gr.Image(type="pil", label="Mirror-Corrected Preview")
55
 
56
  processed_preview = gr.Image(type="pil", label="Processed Preview for OCR")