MeasurementTesting

Sleeping

App Files Files Community

MeasurementTesting / Counting_Columns_2_1.py

Marthee

Update Counting_Columns_2_1.py

86f6cb5 verified about 1 year ago

raw

history blame

4.9 kB

	import cv2
	import numpy as np
	import pandas as pd
	import statistics
	from statistics import mode
	from PIL import Image
	import io
	import pypdfium2 as pdfium
	import fitz # PyMuPDF
	import os

	def get_text_from_pdf(input_pdf_path):
	pdf_document = fitz.open('pdf',input_pdf_path)

	for page_num in range(pdf_document.page_count):
	page = pdf_document[page_num]
	text_instances = page.get_text("words")

	page.apply_redactions()
	return text_instances

	def convert2img(path):
	pdf = pdfium.PdfDocument(path)
	page = pdf.get_page(0)
	pil_image = page.render().to_pil()
	pl1=np.array(pil_image)
	img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
	return img

	def changeWhiteColumns(img):
	imgCopy = img.copy()
	hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
	white_range_low = np.array([0,0,250])
	white_range_high = np.array([0,0,255])
	mask2=cv2.inRange(hsv,white_range_low, white_range_high)
	imgCopy[mask2>0]=(255,0,0)
	return imgCopy

	def changeGrayModify(img):
	hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

	gray_range_low = np.array([0,0,175])
	gray_range_high = np.array([0,0,199])

	mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
	img[mask>0]=(255,0,0)
	return img

	def segment_blue(gray_changed):
	hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)

	lowerRange1 = np.array([120, 255, 255])
	upperRange1 = np.array([179, 255, 255])
	mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
	imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)

	return imgResult3

	def segment_brown(img):
	lowerRange1 = np.array([0, 9, 0])
	upperRange1 = np.array([81, 255, 255])
	hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
	imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
	return imgResult3

	def threshold(imgResult3):
	gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
	gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
	outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
	return outsu4

	def get_columns_info(outsu4, img):
	mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
	mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
	contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
	p = [] #to save points of each contour
	for i, cnt in enumerate(contours):
	M = cv2.moments(cnt)
	if M['m00'] != 0.0:
	x1 = int(M['m10']/M['m00'])
	y1 = int(M['m01']/M['m00'])

	area = cv2.contourArea(cnt)
	if area > (881.0*2):
	perimeter = cv2.arcLength(cnt,True)
	#print(perimeter)
	cv2.drawContours(mask_walls, [cnt], -1, 0, -1)

	if area < (881.0 * 2) and area > 90:
	# maybe make it area < (881.0 * 1.5)
	p.append((x1,y1))
	#print(area)
	cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
	return p, mask_clmns, mask_walls

	def getTextsPoints(x):
	point_list = []
	for h in x:
	point_list.append((h[2],h[3]))
	return point_list


	def distance(point1, point2):
	x1, y1 = point1
	x2, y2 = point2
	return np.sqrt((x1 - x2) 2 + (y1 - y2) 2)

	def getNearestText(point_list, p):
	nearbyy = []
	dis = []
	for i in range(len(p)):
	nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
	dist = distance(nearest_point, p[i])
	dis.append(dist)
	if dist < 44:
	nearbyy.append(nearest_point)
	return nearbyy


	def getColumnsTypes(nearbyy, x):
	found_tuple = []
	# Loop through the list of tuples
	for i in range(len(nearbyy)):
	for tpl in x:
	if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
	found_tuple.append(tpl[4])
	return found_tuple

	def generate_legend(found_tuple):
	word_freq = {}
	for word in found_tuple:
	if word in word_freq:
	word_freq[word] += 1
	else:
	word_freq[word] = 1
	data = word_freq
	df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
	return df

	def mainfun(plan):
	texts_from_pdf = get_text_from_pdf(plan)
	img = convert2img(plan)
	imgResult = segment_brown(img)
	outsu = threshold(imgResult)
	column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
	if len(column_points) > 10:
	# BROWN COLUMNS
	text_points = getTextsPoints(texts_from_pdf)
	nearby = getNearestText(text_points, column_points)
	columns_types = getColumnsTypes(nearby, texts_from_pdf)
	legend = generate_legend(columns_types)
	else:
	# BLUE COLUMNS
	img_blue = changeGrayModify(img)
	imgResult = segment_blue(img_blue)
	outsu = threshold(imgResult)
	column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
	text_points = getTextsPoints(texts_from_pdf)
	nearby = getNearestText(text_points, column_points)
	columns_types = getColumnsTypes(nearby, texts_from_pdf)
	legend = generate_legend(columns_types)
	return legend