Tameem_LatestDuplicate_Working

Paused

App Files Files Community

Tameem_LatestDuplicate_Working / Counting_Columns_2_1.py

Marthee

Update Counting_Columns_2_1.py

3f77f88 verified 12 months ago

raw

history blame contribute delete

11.9 kB

	import cv2
	import numpy as np
	import pandas as pd
	import statistics
	from statistics import mode
	from PIL import Image
	import io
	import google_sheet_Legend
	import pypdfium2 as pdfium
	import fitz # PyMuPDF
	import os
	import random

	def get_text_from_pdf(input_pdf_path):
	pdf_document = fitz.open('pdf',input_pdf_path)

	for page_num in range(pdf_document.page_count):
	page = pdf_document[page_num]
	text_instances = page.get_text("words")

	page.apply_redactions()
	return text_instances

	def convert2img(path):
	pdf = pdfium.PdfDocument(path)
	page = pdf.get_page(0)
	pil_image = page.render().to_pil()
	pl1=np.array(pil_image)
	img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
	return img

	def changeWhiteColumns(img):
	imgCopy = img.copy()
	hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
	white_range_low = np.array([0,0,250])
	white_range_high = np.array([0,0,255])
	mask2=cv2.inRange(hsv,white_range_low, white_range_high)
	imgCopy[mask2>0]=(255,0,0)
	return imgCopy

	def changeGrayModify(img):
	hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

	gray_range_low = np.array([0,0,175])
	gray_range_high = np.array([0,0,199])

	mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
	img[mask>0]=(255,0,0)
	return img

	def segment_blue(gray_changed):
	hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)

	lowerRange1 = np.array([120, 255, 255])
	upperRange1 = np.array([179, 255, 255])
	mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
	imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)

	return imgResult3

	def segment_brown(img):
	lowerRange1 = np.array([0, 9, 0])
	upperRange1 = np.array([81, 255, 255])
	hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
	imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
	return imgResult3

	def threshold(imgResult3):
	gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
	gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
	outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
	return outsu4

	def get_columns_info(outsu4, img):
	mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
	mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
	contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
	p = [] #to save points of each contour
	for i, cnt in enumerate(contours):
	M = cv2.moments(cnt)
	if M['m00'] != 0.0:
	x1 = int(M['m10']/M['m00'])
	y1 = int(M['m01']/M['m00'])

	area = cv2.contourArea(cnt)
	if area > (881.0*2):
	perimeter = cv2.arcLength(cnt,True)
	#print(perimeter)
	cv2.drawContours(mask_walls, [cnt], -1, 0, -1)

	if area < (881.0 * 2) and area > 90:
	# maybe make it area < (881.0 * 1.5)
	p.append((x1,y1))
	#print(area)
	cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
	return p, mask_clmns, mask_walls

	def getTextsPoints(x):
	point_list = []
	pt_clm = {}
	for h in x:
	point_list.append(calculate_midpoint(h[1],h[0],h[3],h[2]))
	pt_clm[calculate_midpoint(h[1],h[0],h[3],h[2])] = h[4]
	return point_list, pt_clm

	def fix_90_ky_val(pt_clm, derotationMatrix):
	new_derotated = {}
	for ky in pt_clm:
	pts = fitz.Point(ky[0], ky[1]) * derotationMatrix
	new_ky = ((int(pts.y),int(pts.x)))
	new_derotated[new_ky] = pt_clm[ky]
	return new_derotated

	def calculate_midpoint(x1,y1,x2,y2):
	xm = int((x1 + x2) / 2)
	ym = int((y1 + y2) / 2)
	return (xm, ym)

	def getColumnsTypesKeyValue(nearbyy, pt_clm):
	words = []
	for i in range(len(nearbyy)):
	words.append(pt_clm[nearbyy[i]])
	return words

	def fix_rotation_90(pc_coordinates, derotationMatrix):
	coor = []
	for coordinate in pc_coordinates:
	pts = fitz.Point(coordinate[0], coordinate[1]) * derotationMatrix
	coor.append((int(pts.y),int(pts.x)))
	return coor

	def distance(point1, point2):
	x1, y1 = point1
	x2, y2 = point2
	return np.sqrt((x1 - x2) 2 + (y1 - y2) 2)

	def getNearestText(point_list, p):
	nearbyy = []
	selected_clm_point = [] #save the clmn for drawing cirlce on it
	dis = []
	txt_clmn = []
	for i in range(len(p)):
	nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
	dist = distance(nearest_point, p[i])
	dis.append(dist)
	if dist < 44:
	nearbyy.append(nearest_point)
	selected_clm_point.append(p[i])
	txt_clmn.append((nearest_point, p[i]))
	return nearbyy, selected_clm_point, txt_clmn


	def getColumnsTypes(nearbyy, x):
	found_tuple = []
	# Loop through the list of tuples
	for i in range(len(nearbyy)):
	for tpl in x:
	if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
	found_tuple.append(tpl[4])
	return found_tuple

	def generate_legend(found_tuple):
	word_freq = {}
	for word in found_tuple:
	if word in word_freq:
	word_freq[word] += 1
	else:
	word_freq[word] = 1
	data = word_freq
	df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
	return df

	def color_groups(txtpts_ky_vlu):
	unique_labels = list(set(txtpts_ky_vlu.values()))
	def generate_rgb():
	return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # RGB tuple
	key_colors = {key: generate_rgb() for key in unique_labels} # Assign a unique RGB color to each key
	return key_colors

	def get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors):
	#Search for each word in the txt_clmn to get the word associated to it
	huge_list_clmn_clr_loc = []
	for text_location, column_location in txt_clmn:
	word = txtpts_ky_vlu[text_location]
	huge_list_clmn_clr_loc.append((text_location, column_location, word, key_colors[word]))
	return huge_list_clmn_clr_loc #text_location, column_location, word, color
	'''def add_annotations_to_pdf(image, pdf_name, slctd_clm, columns_types_v):
	image_width = image.shape[1]
	image_height = image.shape[0]
	# Create a new PDF document
	pdf_document = fitz.open('pdf',pdf_name)
	page=pdf_document[0]
	rotationOld=page.rotation
	derotationMatrix=page.derotation_matrix
	if page.rotation!=0:
	rotationangle = page.rotation
	page.set_rotation(0)
	for i in range(len(slctd_clm)):
	x, y = slctd_clm[i]
	p_midpoint = fitz.Point(x, y) * derotationMatrix
	text = columns_types_v[i]
	# Create an annotation (sticky note)
	annot = page.add_text_annot((p_midpoint.x, p_midpoint.y), text)
	annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
	annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red
	annot.update()
	page.set_rotation(rotationOld)
	return pdf_document'''

	def add_annotations_to_pdf(image, pdf_name, huge_list_clmn_clr_loc):
	image_width = image.shape[1]
	image_height = image.shape[0]
	# Create a new PDF document
	pdf_document = fitz.open('pdf',pdf_name)
	page=pdf_document[0]
	rotationOld=page.rotation
	derotationMatrix=page.derotation_matrix
	if page.rotation!=0:
	rotationangle = page.rotation
	page.set_rotation(0)
	#for i in range(len(slctd_clm)):
	for text_loc, column_loc, word, clr in huge_list_clmn_clr_loc:
	x, y = column_loc
	clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
	#x, y = slctd_clm[i]
	p_midpoint = fitz.Point(x, y) * derotationMatrix
	annot = page.add_circle_annot(
	fitz.Rect(p_midpoint.x - 10, p_midpoint.y - 10, p_midpoint.x + 10,p_midpoint.y + 10) # Small circle
	)
	# ✅ Assign required Bluebeam metadata
	annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
	annot.set_border(width=2) # Border thickness
	annot.set_opacity(1) # Fully visible
	#text = columns_types_v[i]
	# ✅ Set annotation properties for Bluebeam Count detection
	annot.set_info("name", word) # Unique name for each count
	annot.set_info("subject", "Count") # ✅ Bluebeam uses "Count" for Count markups
	annot.set_info("title", word) # Optional
	annot.update() # Apply changes
	page.set_rotation(rotationOld)
	return pdf_document

	def mainfun(pdf_name,pdfpath,planname):
	pdf_document = fitz.open('pdf',pdf_name)
	page = pdf_document[0]
	rotation = page.rotation
	derotationMatrix=page.derotation_matrix
	texts_from_pdf = get_text_from_pdf(pdf_name)
	text_points, txtpts_ky_vlu = getTextsPoints(texts_from_pdf)
	if rotation != 0:
	if rotation ==90:
	text_points = fix_rotation_90(text_points, derotationMatrix)
	txtpts_ky_vlu = fix_90_ky_val(txtpts_ky_vlu, derotationMatrix)

	img = convert2img(pdf_name)
	imgResult = segment_brown(img)
	outsu = threshold(imgResult)
	column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
	key_colors = color_groups(txtpts_ky_vlu)

	if len(column_points) > 10:
	# BROWN COLUMNS
	nearby, slctd_clm, txt_clmn = getNearestText(text_points, column_points)
	columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
	legend = generate_legend(columns_types_v)
	huge_list_clmn_clr_loc = get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors)

	else:
	# BLUE COLUMNS
	img_blue = changeGrayModify(img)
	imgResult = segment_blue(img_blue)
	outsu = threshold(imgResult)
	column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
	nearby, slctd_clm, txt_clmn = getNearestText(text_points, column_points)
	columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
	legend = generate_legend(columns_types_v)
	huge_list_clmn_clr_loc = get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors)

	pdf_document = add_annotations_to_pdf(img, pdf_name, huge_list_clmn_clr_loc)
	page=pdf_document[0]
	pix = page.get_pixmap() # render page to an image
	pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
	img=np.array(pl)
	annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	legend = legend.fillna(' ')
	gc,spreadsheet_service,spreadsheetId, spreadsheet_url , namepathArr=google_sheet_Legend.legendGoogleSheets(legend , planname,pdfpath)
	list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
	for page in pdf_document:
	for annot in page.annots():
	annot_color = annot.colors
	if annot_color is not None:
	stroke_color = annot_color.get('stroke') # Border color
	print('strokeee',stroke_color)
	if stroke_color:
	v='stroke'
	list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[255,0,0]]


	print('list1',list1)
	return annotatedimg, pdf_document , spreadsheet_url, list1, legend

	'''def mainfun(plan):
	texts_from_pdf = get_text_from_pdf(plan)
	img = convert2img(plan)
	imgResult = segment_brown(img)
	outsu = threshold(imgResult)
	column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
	if len(column_points) > 10:
	# BROWN COLUMNS
	text_points = getTextsPoints(texts_from_pdf)
	nearby = getNearestText(text_points, column_points)
	if rotation != 0:
	if rotation ==90:
	nearby = fix_rotation_90(pc_coordinates)
	columns_types = getColumnsTypes(nearby, texts_from_pdf)
	legend = generate_legend(columns_types)
	else:
	# BLUE COLUMNS
	img_blue = changeGrayModify(img)
	imgResult = segment_blue(img_blue)
	outsu = threshold(imgResult)
	column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
	text_points = getTextsPoints(texts_from_pdf)
	nearby = getNearestText(text_points, column_points)
	if rotation != 0:
	if rotation ==90:
	nearby = fix_rotation_90(pc_coordinates)
	columns_types = getColumnsTypes(nearby, texts_from_pdf)
	legend = generate_legend(columns_types)
	return legend'''