MeasurementTesting

Sleeping

App Files Files Community

MeasurementTesting / tameem2_1.py

Marthee

Upload tameem2_1.py

dd3a079 over 2 years ago

raw

history blame contribute delete

3.67 kB

	# -- coding: utf-8 --
	"""(Deployment)2.1 counting columns.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1R2CszBuVN-Rugu8CyGQzqsdFw11E3eHN

	## Libraries
	"""

	# from google.colab.patches import cv2_imshow
	import cv2
	import numpy as np
	import pandas as pd

	import statistics
	from statistics import mode

	from PIL import Image

	# pip install PyPDF2

	# pip install PyMuPDF

	# pip install pip install PyMuPDF==1.19.0

	import io

	# !pip install pypdfium2
	import pypdfium2 as pdfium

	import fitz # PyMuPDF

	import pandas as pd
	import pilecaps_adr
	"""# Functions"""

	def get_text_from_pdf(input_pdf_path):
	pdf_document = fitz.open('dropbox_plans/2.1/'+input_pdf_path)

	for page_num in range(pdf_document.page_count):
	page = pdf_document[page_num]
	text_instances = page.get_text("words")

	page.apply_redactions()
	return text_instances

	def convert2img(path):
	pdf = pdfium.PdfDocument('dropbox_plans/2.1/'+path)
	page = pdf.get_page(0)
	pil_image = page.render().to_pil()
	pl1=np.array(pil_image)
	img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
	return img

	def segment(img):
	lowerRange1 = np.array([0, 9, 0])
	upperRange1 = np.array([81, 255, 255])
	hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
	imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
	return imgResult3

	def threshold(imgResult3):
	gaus = cv2.GaussianBlur(imgResult3, (3,3),9)
	gray2 = cv2.cvtColor(gaus, cv2.COLOR_BGR2GRAY)
	outsu2 = cv2.threshold(gray2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
	return outsu2

	# Deleted the image drawing
	def getColumnsPoints(outsu4):
	contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
	p = []
	for i, cnt in enumerate(contours):
	M = cv2.moments(cnt)
	if M['m00'] != 0.0:
	x1 = int(M['m10']/M['m00'])
	y1 = int(M['m01']/M['m00'])
	p.append((x1,y1))
	return p

	def getTextsPoints(x):
	point_list = []
	for h in x:
	point_list.append((h[2],h[3]))
	return point_list

	def distance(point1, point2):
	x1, y1 = point1
	x2, y2 = point2
	return np.sqrt((x1 - x2) 2 + (y1 - y2) 2)

	def getNearestText(point_list, p):
	nearbyy = []
	dis = []
	for i in range(len(p)):
	nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
	dist = distance(nearest_point, p[i])
	dis.append(dist)
	if dist < 44:
	nearbyy.append(nearest_point)
	return nearbyy

	def getColumnsTypes(nearbyy, x):
	found_tuple = []
	# Loop through the list of tuples
	for i in range(len(nearbyy)):
	for tpl in x:
	if tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]:
	found_tuple.append(tpl[4])
	return found_tuple

	def generate_legend(found_tuple):
	word_freq = {}
	for word in found_tuple:
	if word in word_freq:
	word_freq[word] += 1
	else:
	word_freq[word] = 1
	data = word_freq
	df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
	return df

	def mainfun(plan,pathtoplan):
	texts_from_pdf = get_text_from_pdf(plan)
	img = convert2img(plan)
	imgResult = segment(img)
	outsu = threshold(imgResult)
	column_points = getColumnsPoints(outsu)
	text_points = getTextsPoints(texts_from_pdf)
	nearby = getNearestText(text_points, column_points)
	columns_types = getColumnsTypes(nearby, texts_from_pdf)
	legend = generate_legend(columns_types)
	gc,spreadsheet_service,spreadsheetId ,spreadsheet_url , namepathArr=pilecaps_adr.legendGoogleSheets(legend,path=plan,pdfpath=pathtoplan)
	return spreadsheet_url

	"""# Call"""