zixuanvtzx commited on
Commit
26a4b95
·
verified ·
1 Parent(s): dcb30d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -1,31 +1,26 @@
1
  import gradio as gr
 
2
  from PIL import Image
3
- import pytesseract
4
- import cv2
5
- import numpy as np
 
6
 
7
  def ocr_extract(image):
8
  if image is None:
9
- return "Please upload an image file."
10
-
11
- # Convert PIL image to OpenCV format
12
- image_np = np.array(image)
13
- image_cv = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
14
-
15
- # (Optional) Preprocess for better accuracy
16
- gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
17
- _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
18
-
19
- # OCR
20
- text = pytesseract.image_to_string(thresh)
21
- return text.strip()
22
 
23
  iface = gr.Interface(
24
  fn=ocr_extract,
25
  inputs=gr.Image(type="pil"),
26
  outputs="text",
27
- title="Bill Text Scanner (Tesseract)",
28
- description="Upload a scanned bill or screenshot to extract all readable text."
29
  )
30
 
31
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
3
  from PIL import Image
4
+
5
+ # Load the TrOCR model for printed text
6
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
7
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
8
 
9
  def ocr_extract(image):
10
  if image is None:
11
+ return "Please upload an image."
12
+ image = image.convert("RGB")
13
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
14
+ generated_ids = model.generate(pixel_values)
15
+ extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
16
+ return extracted_text.strip()
 
 
 
 
 
 
 
17
 
18
  iface = gr.Interface(
19
  fn=ocr_extract,
20
  inputs=gr.Image(type="pil"),
21
  outputs="text",
22
+ title="Bill OCR Scanner (Printed Text)",
23
+ description="Upload a machine-printed bill or receipt to extract text."
24
  )
25
 
26
  if __name__ == "__main__":