MeasurementTesting / Counting_Columns_2_1.py
Marthee's picture
Update Counting_Columns_2_1.py
86f6cb5 verified
raw
history blame
4.9 kB
import cv2
import numpy as np
import pandas as pd
import statistics
from statistics import mode
from PIL import Image
import io
import pypdfium2 as pdfium
import fitz # PyMuPDF
import os
def get_text_from_pdf(input_pdf_path):
pdf_document = fitz.open('pdf',input_pdf_path)
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
text_instances = page.get_text("words")
page.apply_redactions()
return text_instances
def convert2img(path):
pdf = pdfium.PdfDocument(path)
page = pdf.get_page(0)
pil_image = page.render().to_pil()
pl1=np.array(pil_image)
img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
return img
def changeWhiteColumns(img):
imgCopy = img.copy()
hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
white_range_low = np.array([0,0,250])
white_range_high = np.array([0,0,255])
mask2=cv2.inRange(hsv,white_range_low, white_range_high)
imgCopy[mask2>0]=(255,0,0)
return imgCopy
def changeGrayModify(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
gray_range_low = np.array([0,0,175])
gray_range_high = np.array([0,0,199])
mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
img[mask>0]=(255,0,0)
return img
def segment_blue(gray_changed):
hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)
lowerRange1 = np.array([120, 255, 255])
upperRange1 = np.array([179, 255, 255])
mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)
return imgResult3
def segment_brown(img):
lowerRange1 = np.array([0, 9, 0])
upperRange1 = np.array([81, 255, 255])
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
return imgResult3
def threshold(imgResult3):
gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
return outsu4
def get_columns_info(outsu4, img):
mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
p = [] #to save points of each contour
for i, cnt in enumerate(contours):
M = cv2.moments(cnt)
if M['m00'] != 0.0:
x1 = int(M['m10']/M['m00'])
y1 = int(M['m01']/M['m00'])
area = cv2.contourArea(cnt)
if area > (881.0*2):
perimeter = cv2.arcLength(cnt,True)
#print(perimeter)
cv2.drawContours(mask_walls, [cnt], -1, 0, -1)
if area < (881.0 * 2) and area > 90:
# maybe make it area < (881.0 * 1.5)
p.append((x1,y1))
#print(area)
cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
return p, mask_clmns, mask_walls
def getTextsPoints(x):
point_list = []
for h in x:
point_list.append((h[2],h[3]))
return point_list
def distance(point1, point2):
x1, y1 = point1
x2, y2 = point2
return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
def getNearestText(point_list, p):
nearbyy = []
dis = []
for i in range(len(p)):
nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
dist = distance(nearest_point, p[i])
dis.append(dist)
if dist < 44:
nearbyy.append(nearest_point)
return nearbyy
def getColumnsTypes(nearbyy, x):
found_tuple = []
# Loop through the list of tuples
for i in range(len(nearbyy)):
for tpl in x:
if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
found_tuple.append(tpl[4])
return found_tuple
def generate_legend(found_tuple):
word_freq = {}
for word in found_tuple:
if word in word_freq:
word_freq[word] += 1
else:
word_freq[word] = 1
data = word_freq
df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
return df
def mainfun(plan):
texts_from_pdf = get_text_from_pdf(plan)
img = convert2img(plan)
imgResult = segment_brown(img)
outsu = threshold(imgResult)
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
if len(column_points) > 10:
# BROWN COLUMNS
text_points = getTextsPoints(texts_from_pdf)
nearby = getNearestText(text_points, column_points)
columns_types = getColumnsTypes(nearby, texts_from_pdf)
legend = generate_legend(columns_types)
else:
# BLUE COLUMNS
img_blue = changeGrayModify(img)
imgResult = segment_blue(img_blue)
outsu = threshold(imgResult)
column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
text_points = getTextsPoints(texts_from_pdf)
nearby = getNearestText(text_points, column_points)
columns_types = getColumnsTypes(nearby, texts_from_pdf)
legend = generate_legend(columns_types)
return legend