Spaces:
Running
Running
| import fitz | |
| from PIL import Image | |
| class doc_processing: | |
| def __init__(self, name, id_type, doc_type, f_path): | |
| self.name = name | |
| self.id_type = id_type | |
| self.doc_type = doc_type | |
| self.f_path = f_path | |
| # self.o_path = o_path | |
| def pdf_to_image_scale(self): | |
| pdf_document = fitz.open(self.f_path) | |
| if self.id_type == "gst": | |
| page_num = 2 | |
| else: | |
| page_num = 0 | |
| page = pdf_document.load_page(page_num) | |
| pix = page.get_pixmap() # Render page as a pixmap (image) | |
| # Convert pixmap to PIL Image | |
| image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| original_width, original_height = image.size | |
| print("original_width",original_width) | |
| print("original_height",original_height) | |
| new_width = (1000 / original_width) * original_width | |
| new_height = (1000 / original_height) * original_height | |
| print("new_width",new_width) | |
| print("new_height",new_height) | |
| # new_width = | |
| # new_height = | |
| image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS) | |
| output_path = "processed_images/{}/{}.jpeg".format(self.id_type,self.name) | |
| image.save(output_path) | |
| return {"success":200,"output_p":output_path} | |
| def scale_img(self): | |
| image = Image.open(self.f_path).convert("RGB") | |
| original_width, original_height = image.size | |
| print("original_width",original_width) | |
| print("original_height",original_height) | |
| new_width = (1000 / original_width) * original_width | |
| new_height = (1000 / original_height) * original_height | |
| print("new_width",new_width) | |
| print("new_height",new_height) | |
| # new_width = | |
| # new_height = | |
| image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS) | |
| output_path = "processed_images/{}/{}.jpeg".format(self.id_type,self.name) | |
| image.save(output_path) | |
| return {"success":200,"output_p":output_path} | |
| def process(self): | |
| if self.doc_type == "pdf": | |
| response = self.pdf_to_image_scale() | |
| else: | |
| response = self.scale_img() | |
| return response | |
| # files = { | |
| # "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg", | |
| # "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", | |
| # "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", | |
| # "gst_file": "/home/javmulla/model_one/test_images_gst/0a52fbcb_page3_image_0.jpg" | |
| # } | |
| # files = { | |
| # "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg", | |
| # "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", | |
| # "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", | |
| # "gst_file": "test_Images_folder/gst/e.pdf" | |
| # } | |
| # for key, value in files.items(): | |
| # name = value.split("/")[-1].split(".")[0] | |
| # id_type = key.split("_")[0] | |
| # doc_type = value.split("/")[-1].split(".")[1] | |
| # f_path = value | |
| # preprocessing = doc_processing(name,id_type,doc_type,f_path) | |
| # response = preprocessing.process() | |
| # print("response",response) | |
| # id_type, doc_type, f_path | |