MeasurementTesting / Counting_Columns_2_1.py
Marthee's picture
Update Counting_Columns_2_1.py
c562e63 verified
import cv2
import numpy as np
import pandas as pd
import statistics
from statistics import mode
from PIL import Image
import io
import google_sheet_Legend
import pypdfium2 as pdfium
import fitz # PyMuPDF
import os
def get_text_from_pdf(input_pdf_path):
pdf_document = fitz.open('pdf',input_pdf_path)
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
text_instances = page.get_text("words")
page.apply_redactions()
return text_instances
def convert2img(path):
pdf = pdfium.PdfDocument(path)
page = pdf.get_page(0)
pil_image = page.render().to_pil()
pl1=np.array(pil_image)
img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
return img
def changeWhiteColumns(img):
imgCopy = img.copy()
hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
white_range_low = np.array([0,0,250])
white_range_high = np.array([0,0,255])
mask2=cv2.inRange(hsv,white_range_low, white_range_high)
imgCopy[mask2>0]=(255,0,0)
return imgCopy
def changeGrayModify(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
gray_range_low = np.array([0,0,175])
gray_range_high = np.array([0,0,199])
mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
img[mask>0]=(255,0,0)
return img
def segment_blue(gray_changed):
hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)
lowerRange1 = np.array([120, 255, 255])
upperRange1 = np.array([179, 255, 255])
mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)
return imgResult3
def segment_brown(img):
lowerRange1 = np.array([0, 9, 0])
upperRange1 = np.array([81, 255, 255])
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
return imgResult3
def threshold(imgResult3):
gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
return outsu4
def get_columns_info(outsu4, img):
mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
p = [] #to save points of each contour
for i, cnt in enumerate(contours):
M = cv2.moments(cnt)
if M['m00'] != 0.0:
x1 = int(M['m10']/M['m00'])
y1 = int(M['m01']/M['m00'])
area = cv2.contourArea(cnt)
if area > (881.0*2):
perimeter = cv2.arcLength(cnt,True)
#print(perimeter)
cv2.drawContours(mask_walls, [cnt], -1, 0, -1)
if area < (881.0 * 2) and area > 90:
# maybe make it area < (881.0 * 1.5)
p.append((x1,y1))
#print(area)
cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
return p, mask_clmns, mask_walls
def getTextsPoints(x):
point_list = []
pt_clm = {}
for h in x:
point_list.append(calculate_midpoint(h[1],h[0],h[3],h[2]))
pt_clm[calculate_midpoint(h[1],h[0],h[3],h[2])] = h[4]
return point_list, pt_clm
def fix_90_ky_val(pt_clm, derotationMatrix):
new_derotated = {}
for ky in pt_clm:
pts = fitz.Point(ky[0], ky[1]) * derotationMatrix
new_ky = ((int(pts.y),int(pts.x)))
new_derotated[new_ky] = pt_clm[ky]
return new_derotated
def calculate_midpoint(x1,y1,x2,y2):
xm = int((x1 + x2) / 2)
ym = int((y1 + y2) / 2)
return (xm, ym)
def getColumnsTypesKeyValue(nearbyy, pt_clm):
words = []
for i in range(len(nearbyy)):
words.append(pt_clm[nearbyy[i]])
return words
def fix_rotation_90(pc_coordinates, derotationMatrix):
coor = []
for coordinate in pc_coordinates:
pts = fitz.Point(coordinate[0], coordinate[1]) * derotationMatrix
coor.append((int(pts.y),int(pts.x)))
return coor
def distance(point1, point2):
x1, y1 = point1
x2, y2 = point2
return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
def getNearestText(point_list, p):
nearbyy = []
selected_clm_point = [] #save the clmn for drawing cirlce on it
dis = []
for i in range(len(p)):
nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
dist = distance(nearest_point, p[i])
dis.append(dist)
if dist < 44:
nearbyy.append(nearest_point)
selected_clm_point.append(p[i])
return nearbyy, selected_clm_point
def getColumnsTypes(nearbyy, x):
found_tuple = []
# Loop through the list of tuples
for i in range(len(nearbyy)):
for tpl in x:
if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
found_tuple.append(tpl[4])
return found_tuple
def generate_legend(found_tuple):
word_freq = {}
for word in found_tuple:
if word in word_freq:
word_freq[word] += 1
else:
word_freq[word] = 1
data = word_freq
df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
return df
def add_annotations_to_pdf(image, pdf_name, slctd_clm, columns_types_v):
image_width = image.shape[1]
image_height = image.shape[0]
# Create a new PDF document
pdf_document = fitz.open('pdf',pdf_name)
page=pdf_document[0]
rotationOld=page.rotation
derotationMatrix=page.derotation_matrix
if page.rotation!=0:
rotationangle = page.rotation
page.set_rotation(0)
for i in range(len(slctd_clm)):
x, y = slctd_clm[i]
p_midpoint = fitz.Point(x, y) * derotationMatrix
text = columns_types_v[i]
# Create an annotation (sticky note)
annot = page.add_text_annot((p_midpoint.x, p_midpoint.y), text)
annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red
annot.update()
page.set_rotation(rotationOld)
return pdf_document
def mainfun(pdf_name,pdfpath,planname):
pdf_document = fitz.open('pdf',pdf_name)
page = pdf_document[0]
rotation = page.rotation
derotationMatrix=page.derotation_matrix
texts_from_pdf = get_text_from_pdf(pdf_name)
text_points, txtpts_ky_vlu = getTextsPoints(texts_from_pdf)
if rotation != 0:
if rotation ==90:
text_points = fix_rotation_90(text_points, derotationMatrix)
txtpts_ky_vlu = fix_90_ky_val(txtpts_ky_vlu, derotationMatrix)
img = convert2img(pdf_name)
imgResult = segment_brown(img)
outsu = threshold(imgResult)
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
if len(column_points) > 10:
# BROWN COLUMNS
nearby, slctd_clm = getNearestText(text_points, column_points)
columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
legend = generate_legend(columns_types_v)
else:
# BLUE COLUMNS
img_blue = changeGrayModify(img)
imgResult = segment_blue(img_blue)
outsu = threshold(imgResult)
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
nearby, slctd_clm = getNearestText(text_points, column_points)
columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
legend = generate_legend(columns_types_v)
pdf_document = add_annotations_to_pdf(img, pdf_name, slctd_clm, columns_types_v)
page=pdf_document[0]
pix = page.get_pixmap() # render page to an image
pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
img=np.array(pl)
annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
legend = legend.fillna(' ')
gc,spreadsheet_service,spreadsheetId, spreadsheet_url , namepathArr=google_sheet_Legend.legendGoogleSheets(legend , planname,pdfpath)
list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
for page in pdf_document:
for annot in page.annots():
annot_color = annot.colors
if annot_color is not None:
stroke_color = annot_color.get('stroke') # Border color
print('strokeee',stroke_color)
if stroke_color:
v='stroke'
list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[255,0,0]]
print('list1',list1)
return annotatedimg, pdf_document , spreadsheet_url, list1, legend
'''def mainfun(plan):
texts_from_pdf = get_text_from_pdf(plan)
img = convert2img(plan)
imgResult = segment_brown(img)
outsu = threshold(imgResult)
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
if len(column_points) > 10:
# BROWN COLUMNS
text_points = getTextsPoints(texts_from_pdf)
nearby = getNearestText(text_points, column_points)
if rotation != 0:
if rotation ==90:
nearby = fix_rotation_90(pc_coordinates)
columns_types = getColumnsTypes(nearby, texts_from_pdf)
legend = generate_legend(columns_types)
else:
# BLUE COLUMNS
img_blue = changeGrayModify(img)
imgResult = segment_blue(img_blue)
outsu = threshold(imgResult)
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
text_points = getTextsPoints(texts_from_pdf)
nearby = getNearestText(text_points, column_points)
if rotation != 0:
if rotation ==90:
nearby = fix_rotation_90(pc_coordinates)
columns_types = getColumnsTypes(nearby, texts_from_pdf)
legend = generate_legend(columns_types)
return legend'''