Spaces:
Runtime error
Runtime error
| import cv2 | |
| import pytesseract | |
| from config import PYTESSERACT_DEFAULT_CONFIG | |
| from pathlib import Path | |
| from tqdm import tqdm | |
| import numpy as np | |
| from langchain.tools import BaseTool | |
| from typing import Optional, Type | |
| from langchain.callbacks.manager import AsyncCallbackManagerForToolRun | |
| from PIL import Image | |
| class ImageProcessor(BaseTool): | |
| name = "ImageProcessor" | |
| description = "useful when you need to extract info from an image in an img_path corresponding to a receipt or invoice and tries to preprocess it returning all the text in the image using an OCR system." | |
| def binarize(self,img_path): | |
| """ | |
| This function is to binarize an input image | |
| :param img: image in format of (h, w, channel) | |
| :return: am image in format of (h, w) | |
| """ | |
| img=cv2.imread(img_path) | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| #gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] #threshold may affect performance for invoices|receipts as seen in our test dataset | |
| return gray | |
| def remove_watermark(self,img,alpha = 1.8,beta = -180): | |
| """remove watermark from image | |
| img: cv2 image| np.array""" | |
| new = alpha * img + beta | |
| new = np.clip(new, 0, 255).astype(np.uint8) | |
| return new | |
| def deskew(self,img): | |
| coords = np.column_stack(np.where(img > 0)) | |
| angle = cv2.minAreaRect(coords)[-1] | |
| if angle < -45: | |
| angle = -(90 + angle) | |
| else: | |
| angle = -angle | |
| (h, w) = img.shape[:2] | |
| center = (w // 2, h // 2) | |
| M = cv2.getRotationMatrix2D(center, angle, 1.0) | |
| rotated = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) | |
| return rotated | |
| def dilate_erode(self,img): | |
| """ | |
| apply an erosion and dilation kernel | |
| img: cv2 image| np.array | |
| """ | |
| kernel = np.ones((2, 1), np.uint8) | |
| kernel2 = np.ones((1, 1), np.uint8) | |
| img = cv2.blur(img,(6,5)) | |
| img=cv2.dilate(img, kernel, iterations=3) | |
| img = cv2.erode(img, (2,1), iterations=1) | |
| img = cv2.blur(img,(1,1)) | |
| img = cv2.bilateralFilter(img,10,35,30) | |
| img= cv2.dilate(img, kernel2, iterations=1) | |
| return img | |
| def detect_angle(self,img_path): | |
| """detects angle of rotation in the image using the text lines found""" | |
| ##taken from https://stackoverflow.com/questions/13872331/rotating-an-image-with-orientation-specified-in-exif-using-python-without-pil-in | |
| pil_img=Image.open(img_path) | |
| img_exif = pil_img.getexif() | |
| if len(img_exif): | |
| if img_exif[274] == 3: | |
| pil_img = pil_img.transpose(Image.ROTATE_180) | |
| elif img_exif[274] == 6: | |
| pil_img = pil_img.transpose(Image.ROTATE_270) | |
| elif img_exif[274] == 8: | |
| pil_img = pil_img.transpose(Image.ROTATE_90) | |
| return np.array(pil_img)[:, :, ::-1] #convert to BGR | |
| def opening(self,image): | |
| kernel = np.ones((5,5),np.uint8) | |
| return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) | |
| def process_image(self,img_path): | |
| img=self.binarize(img_path) | |
| img=self.remove_watermark(img) | |
| return img | |
| def img_to_text(self,img,lang="spa"): | |
| text=pytesseract.image_to_string(img,lang=lang,config=PYTESSERACT_DEFAULT_CONFIG) | |
| return text | |
| def _run(self,img_path,save_to_disk=False): | |
| img=self.process_image(img_path) | |
| text=self.img_to_text(img) | |
| if save_to_disk: | |
| with open(f"/tmp/{str(img_path).split('/')[-1].replace('.jpg','.txt')}",'w') as f: | |
| f.write(text) | |
| cv2.imwrite(f"images/rotated-{img_pth.name}",img) | |
| return text | |
| # as used in langchain documentation https://python.langchain.com/docs/modules/agents/tools/custom_tools | |
| async def _arun(self, img_path: str,save_to_disk=False, run_manager: Optional[AsyncCallbackManagerForToolRun] = None | |
| ) -> str: | |
| """Use the tool asynchronously.""" | |
| raise NotImplementedError("does not support async") | |