Spaces:

capitaletech
/

cv_quality

Sleeping

App Files Files Community

Nassiraaa commited on Aug 6, 2024

Commit

4dae3cd

verified ·

1 Parent(s): e892881

Update ocr_functions.py

Browse files

Files changed (1) hide show

ocr_functions.py +11 -17

ocr_functions.py CHANGED Viewed

@@ -15,10 +15,12 @@ def textract_ocr(image, box):
     img_bytes = io.BytesIO()
     cropped_image.save(img_bytes, format='PNG')
     img_bytes = img_bytes.getvalue()
-    client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"),
-                          aws_secret_access_key=os.getenv('aws_secret_access_key')
-    )
     response = client.detect_document_text(Document={'Bytes': img_bytes})
     blocks = response['Blocks']
     texttract = ""
@@ -26,38 +28,30 @@ def textract_ocr(image, box):
     for block in blocks:
         if(block['BlockType'] == 'LINE'):
             line_confidence[block['Text']] = block['Confidence']
-            texttract+= block['Text']+"\n"
-    return texttract
-def paddle_ocr(image,box):
     x1, y1, x2, y2 = box
     cropped_image = image.crop((x1, y1, x2, y2))
     cropped_image = np.array(cropped_image)
     ocr = PaddleOCR(use_angle_cls=False, lang='latin')
     result = ocr.ocr(cropped_image, cls=False)
-    text= ""
-    if result [0] != None:
         result.sort(key=lambda x: (x[0][0][1], x[0][0][0]))
         text = [x[1][0] for x in result[0]]
     return "\n".join(text)
 def tesseract_ocr(image, box):
     target_dpi = 300
     x1, y1, x2, y2 = box
     cropped_image = image.crop((x1, y1, x2, y2))
     cropped_image = cropped_image.convert("L")
     current_dpi = cropped_image.info['dpi'][0] if 'dpi' in image.info else None
     if current_dpi:
         scale_factor = target_dpi / current_dpi
     else:
         scale_factor = 1.0
     binarized_image = cropped_image.filter(ImageFilter.MedianFilter())
     binarized_image = binarized_image.point(lambda p: p > 180 and 255)

     img_bytes = io.BytesIO()
     cropped_image.save(img_bytes, format='PNG')
     img_bytes = img_bytes.getvalue()
+    client = boto3.client('textract',
+                          region_name='eu-west-3',
+                          aws_access_key_id=os.getenv("aws_access_key_id"),
+                          aws_secret_access_key=os.getenv('aws_secret_access_key'))
     response = client.detect_document_text(Document={'Bytes': img_bytes})
     blocks = response['Blocks']
     texttract = ""
     for block in blocks:
         if(block['BlockType'] == 'LINE'):
             line_confidence[block['Text']] = block['Confidence']
+            texttract += block['Text'] + "\n"
+    return texttract
+def paddle_ocr(image, box):
     x1, y1, x2, y2 = box
     cropped_image = image.crop((x1, y1, x2, y2))
     cropped_image = np.array(cropped_image)
     ocr = PaddleOCR(use_angle_cls=False, lang='latin')
     result = ocr.ocr(cropped_image, cls=False)
+    text = ""
+    if result[0] is not None:
         result.sort(key=lambda x: (x[0][0][1], x[0][0][0]))
         text = [x[1][0] for x in result[0]]
     return "\n".join(text)
 def tesseract_ocr(image, box):
     target_dpi = 300
     x1, y1, x2, y2 = box
     cropped_image = image.crop((x1, y1, x2, y2))
     cropped_image = cropped_image.convert("L")
     current_dpi = cropped_image.info['dpi'][0] if 'dpi' in image.info else None
     if current_dpi:
         scale_factor = target_dpi / current_dpi
     else:
         scale_factor = 1.0
     binarized_image = cropped_image.filter(ImageFilter.MedianFilter())
     binarized_image = binarized_image.point(lambda p: p > 180 and 255)