Spaces:

AuditEdge
/

optimised-ocr

Running

App Files Files Community

optimised-ocr / utils.py

AuditEdge

doc upload option added

f8afc9b 12 months ago

raw

history blame

3.4 kB

	import fitz
	from PIL import Image

	class doc_processing:

	def __init__(self, name, id_type, doc_type, f_path):

	self.name = name
	self.id_type = id_type
	self.doc_type = doc_type
	self.f_path = f_path
	# self.o_path = o_path


	def pdf_to_image_scale(self):
	pdf_document = fitz.open(self.f_path)
	if self.id_type == "gst":
	page_num = 2
	else:
	page_num = 0

	page = pdf_document.load_page(page_num)
	pix = page.get_pixmap() # Render page as a pixmap (image)

	# Convert pixmap to PIL Image
	image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

	original_width, original_height = image.size

	print("original_width",original_width)
	print("original_height",original_height)


	new_width = (1000 / original_width) * original_width
	new_height = (1000 / original_height) * original_height

	print("new_width",new_width)
	print("new_height",new_height)
	# new_width =
	# new_height =
	image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS)
	output_path = "processed_images/{}/{}.jpeg".format(self.id_type,self.name)
	image.save(output_path)
	return {"success":200,"output_p":output_path}


	def scale_img(self):

	image = Image.open(self.f_path).convert("RGB")
	original_width, original_height = image.size

	print("original_width",original_width)
	print("original_height",original_height)


	new_width = (1000 / original_width) * original_width
	new_height = (1000 / original_height) * original_height

	print("new_width",new_width)
	print("new_height",new_height)
	# new_width =
	# new_height =
	image.resize((int(new_width), int(new_height)), Image.Resampling.LANCZOS)
	output_path = "processed_images/{}/{}.jpeg".format(self.id_type,self.name)
	image.save(output_path)
	return {"success":200,"output_p":output_path}

	def process(self):
	if self.doc_type == "pdf":
	response = self.pdf_to_image_scale()
	else:
	response = self.scale_img()

	return response





	# files = {
	# "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg",
	# "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg",
	# "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg",
	# "gst_file": "/home/javmulla/model_one/test_images_gst/0a52fbcb_page3_image_0.jpg"
	# }


	# files = {
	# "aadhar_file": "/home/javmulla/model_one/test_images_aadhar/test_two.jpg",
	# "pan_file": "/home/javmulla/model_one/test_images_pan/6ea33087.jpeg",
	# "cheque_file": "/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg",
	# "gst_file": "test_Images_folder/gst/e.pdf"
	# }

	# for key, value in files.items():
	# name = value.split("/")[-1].split(".")[0]
	# id_type = key.split("_")[0]
	# doc_type = value.split("/")[-1].split(".")[1]
	# f_path = value
	# preprocessing = doc_processing(name,id_type,doc_type,f_path)
	# response = preprocessing.process()
	# print("response",response)





	# id_type, doc_type, f_path