Arjooohn commited on
Commit
8efeaf2
·
verified ·
1 Parent(s): fbbff72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -21
app.py CHANGED
@@ -8,7 +8,10 @@ import os
8
 
9
  def preprocess(image):
10
  """
11
- Preprocess the image for OCR.
 
 
 
12
  """
13
  img = np.array(image)
14
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -20,33 +23,34 @@ def preprocess(image):
20
  return thresh
21
 
22
  def extract_and_speak(image):
 
 
 
23
  processed = preprocess(image)
24
  text = pytesseract.image_to_string(processed, lang="eng")
 
25
  if text.strip() == "":
26
  return "No readable text found.", None
 
27
  tts = gTTS(text)
28
  tts.save("output.mp3")
 
29
  return text, "output.mp3"
30
 
31
- with gr.Blocks() as demo:
32
- gr.Markdown("## GabAI - AI Assistive Reading System")
33
- gr.Markdown(
34
- "Use your webcam to capture printed text. The preview below is mirrored for convenience, "
35
- "but OCR uses the original image."
 
 
 
 
 
 
 
36
  )
37
-
38
- # Webcam input (sources instead of source)
39
- webcam = gr.Image(type="pil", sources=["webcam"])
40
-
41
- # Mirror the preview using CSS (client-side only)
42
- webcam.style(**{"transform": "scaleX(-1)"})
43
-
44
- # Outputs
45
- text_output = gr.Textbox(label="Extracted Text")
46
- audio_output = gr.Audio(label="Text-to-Speech Output")
47
-
48
- # Button to process
49
- submit = gr.Button("Read Text")
50
- submit.click(fn=extract_and_speak, inputs=webcam, outputs=[text_output, audio_output])
51
 
52
- demo.launch()
 
 
8
 
9
  def preprocess(image):
10
  """
11
+ Preprocess the image for OCR:
12
+ - Convert to grayscale
13
+ - Apply Gaussian blur
14
+ - Apply Otsu threshold
15
  """
16
  img = np.array(image)
17
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
23
  return thresh
24
 
25
  def extract_and_speak(image):
26
+ """
27
+ Extract text from image using pytesseract and convert it to speech.
28
+ """
29
  processed = preprocess(image)
30
  text = pytesseract.image_to_string(processed, lang="eng")
31
+
32
  if text.strip() == "":
33
  return "No readable text found.", None
34
+
35
  tts = gTTS(text)
36
  tts.save("output.mp3")
37
+
38
  return text, "output.mp3"
39
 
40
+ # Gradio interface
41
+ interface = gr.Interface(
42
+ fn=extract_and_speak,
43
+ inputs=gr.Image(type="pil", sources=["webcam"]), # Webcam only
44
+ outputs=[
45
+ gr.Textbox(label="Extracted Text"),
46
+ gr.Audio(label="Text-to-Speech Output")
47
+ ],
48
+ title="GabAI - AI Assistive Reading System",
49
+ description=(
50
+ "Use your webcam to capture printed text. "
51
+ "The system extracts the text and converts it into speech."
52
  )
53
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ if __name__ == "__main__":
56
+ interface.launch()