Spaces:

kavg
/

sri-doc

Sleeping

kavg commited on Apr 30, 2024

Commit

16d7e9b

1 Parent(s): ccb5ac8

Revert "commit before changing entity merging process"

Files changed (3) hide show

main.py CHANGED Viewed

@@ -103,12 +103,19 @@ def ApplyOCR(content):
   try:
     trocr_client = ocr.TrOCRClient(config['settings'].TROCR_API_URL)
     handwritten_ocr_df = trocr_client.ocr(handwritten_imgs, image)
-  except Exception as e:
-    print(e)
     raise HTTPException(status_code=400, detail="handwritten OCR process failed")
   ocr_df = pd.concat([handwritten_ocr_df, printed_ocr_df])
-  # ocr_df = printed_ocr_df
   return ocr_df, image

   try:
     trocr_client = ocr.TrOCRClient(config['settings'].TROCR_API_URL)
     handwritten_ocr_df = trocr_client.ocr(handwritten_imgs, image)
+  except:
     raise HTTPException(status_code=400, detail="handwritten OCR process failed")
+  try:
+    jpeg_bytes = io.BytesIO()
+    printed_img.save(jpeg_bytes, format='JPEG')
+    jpeg_content = jpeg_bytes.getvalue()
+    vision_client = ocr.VisionClient(config['settings'].GCV_AUTH)
+    printed_ocr_df = vision_client.ocr(jpeg_content, printed_img)
+  except:
+    raise HTTPException(status_code=400, detail="Printed OCR process failed")
   ocr_df = pd.concat([handwritten_ocr_df, printed_ocr_df])
   return ocr_df, image

ocr.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from google.cloud import vision
 from google.oauth2 import service_account
 import pandas as pd
 import json
 import numpy as np
 import io
 import requests
-from preprocess import cam_scanner_filter
 image_ext = ("*.jpg", "*.jpeg", "*.png")
@@ -22,7 +23,7 @@ class VisionClient:
         except ValueError as e:
             print("Image could not be read")
             return
-        response = self.client.document_text_detection(image, timeout=60)
         return response
     def get_response(self, content):
@@ -133,8 +134,7 @@ class TrOCRClient():
         boxObjects = []
         for i in range(len(handwritten_imgs)):
             handwritten_img = handwritten_imgs[i]
-            handwritten_img_processed = cam_scanner_filter(handwritten_img[0])
-            ocr_result = self.send_request(handwritten_img_processed)
             boxObjects.append({
                 "id": i-1,
                 "text": ocr_result,

 from google.cloud import vision
 from google.oauth2 import service_account
+from google.protobuf.json_format import MessageToJson
 import pandas as pd
 import json
 import numpy as np
+from PIL import Image
 import io
 import requests
 image_ext = ("*.jpg", "*.jpeg", "*.png")
         except ValueError as e:
             print("Image could not be read")
             return
+        response = self.client.document_text_detection(image, timeout=10)
         return response
     def get_response(self, content):
         boxObjects = []
         for i in range(len(handwritten_imgs)):
             handwritten_img = handwritten_imgs[i]
+            ocr_result = self.send_request(handwritten_img[0])
             boxObjects.append({
                 "id": i-1,
                 "text": ocr_result,

preprocess.py CHANGED Viewed

@@ -1,8 +1,5 @@
 import torch
 from transformers import AutoTokenizer
-import cv2
-from PIL import Image
-import numpy as np
 def normalize_box(box, width, height):
     return [
@@ -12,12 +9,6 @@ def normalize_box(box, width, height):
         int(1000 * (box[3] / height)),
     ]
-def cam_scanner_filter(img):
-    image1 = np.array(img)
-    img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
-    thresh2 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY, 199, 15)
-    return Image.fromarray(thresh2)
 # class to turn the keys of a dict into attributes (thanks Stackoverflow)
 class AttrDict(dict):
     def __init__(self, *args, **kwargs):

 import torch
 from transformers import AutoTokenizer
 def normalize_box(box, width, height):
     return [
         int(1000 * (box[3] / height)),
     ]
 # class to turn the keys of a dict into attributes (thanks Stackoverflow)
 class AttrDict(dict):
     def __init__(self, *args, **kwargs):