|
|
import cv2 |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import statistics |
|
|
from statistics import mode |
|
|
from PIL import Image |
|
|
import io |
|
|
import google_sheet_Legend |
|
|
import pypdfium2 as pdfium |
|
|
import fitz |
|
|
import os |
|
|
import random |
|
|
import uuid |
|
|
import math |
|
|
|
|
|
def convert2img(data): |
|
|
pdf = pdfium.PdfDocument(data) |
|
|
page = pdf.get_page(0) |
|
|
pil_image = page.render().to_pil() |
|
|
pl1=np.array(pil_image) |
|
|
img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR) |
|
|
img_cv2 = img |
|
|
|
|
|
return img |
|
|
|
|
|
def threshold(imgResult3): |
|
|
|
|
|
blur = cv2.blur(imgResult3,(7,7)) |
|
|
gray4 = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY) |
|
|
outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] |
|
|
return outsu4 |
|
|
|
|
|
def get_columns_info(outsu4, img): |
|
|
mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255 |
|
|
mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255 |
|
|
contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE) |
|
|
p = [] |
|
|
for i, cnt in enumerate(contours): |
|
|
M = cv2.moments(cnt) |
|
|
if M['m00'] != 0.0: |
|
|
x1 = int(M['m10']/M['m00']) |
|
|
y1 = int(M['m01']/M['m00']) |
|
|
|
|
|
area = cv2.contourArea(cnt) |
|
|
if area > (881.0*2): |
|
|
perimeter = cv2.arcLength(cnt,True) |
|
|
|
|
|
cv2.drawContours(mask_walls, [cnt], -1, 0, -1) |
|
|
|
|
|
if area < (881.0 * 2) and area > 90: |
|
|
|
|
|
p.append((x1,y1)) |
|
|
|
|
|
cv2.drawContours(mask_clmns, [cnt], -1, 0, -1) |
|
|
return p, mask_clmns, mask_walls |
|
|
|
|
|
def get_text_from_pdf(input_pdf_path): |
|
|
|
|
|
pdf_document = fitz.open("pdf", input_pdf_path) |
|
|
results = [] |
|
|
|
|
|
for page_num in range(pdf_document.page_count): |
|
|
page = pdf_document[page_num] |
|
|
width, height = page.rect.width, page.rect.height |
|
|
|
|
|
text_instances = [word for word in page.get_text("words") if word[4].startswith(("C", "c")) and len(word[4]) <= 5] |
|
|
|
|
|
page.apply_redactions() |
|
|
return text_instances |
|
|
|
|
|
def calculate_midpoint(x1,y1,x2,y2): |
|
|
xm = int((x1 + x2) / 2) |
|
|
ym = int((y1 + y2) / 2) |
|
|
return (xm, ym) |
|
|
|
|
|
|
|
|
def getTextsPoints(x, page): |
|
|
point_list = [] |
|
|
pt_clm = {} |
|
|
for h in x: |
|
|
pt = calculate_midpoint(h[0],h[1],h[2],h[3]) |
|
|
pt = fitz.Point(pt[0], pt[1]) |
|
|
pt = pt * page.rotation_matrix |
|
|
point_list.append(pt) |
|
|
pt_clm[pt] = h[4] |
|
|
return point_list, pt_clm |
|
|
|
|
|
def distance(point1, point2): |
|
|
x1, y1 = point1 |
|
|
x2, y2 = point2 |
|
|
return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) |
|
|
|
|
|
def getNearestText(point_list, p): |
|
|
nearbyy = [] |
|
|
selected_clm_point = [] |
|
|
dis = [] |
|
|
txt_clmn = [] |
|
|
for i in range(len(p)): |
|
|
nearest_point = min(point_list, key=lambda point: distance(point, p[i])) |
|
|
dist = distance(nearest_point, p[i]) |
|
|
dis.append(dist) |
|
|
|
|
|
nearbyy.append(nearest_point) |
|
|
selected_clm_point.append(p[i]) |
|
|
txt_clmn.append((nearest_point, p[i])) |
|
|
return nearbyy, selected_clm_point, txt_clmn |
|
|
|
|
|
def color_groups(txtpts_ky_vlu): |
|
|
import random |
|
|
unique_labels = list(set(txtpts_ky_vlu.values())) |
|
|
def generate_rgb(): |
|
|
return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) |
|
|
key_colors = {key: generate_rgb() for key in unique_labels} |
|
|
return key_colors |
|
|
|
|
|
def getColumnsTypesKeyValue(nearbyy, pt_clm): |
|
|
words = [] |
|
|
for i in range(len(nearbyy)): |
|
|
words.append(pt_clm[nearbyy[i]]) |
|
|
return words |
|
|
|
|
|
def generate_legend(found_tuple): |
|
|
word_freq = {} |
|
|
for word in found_tuple: |
|
|
if word in word_freq: |
|
|
word_freq[word] += 1 |
|
|
else: |
|
|
word_freq[word] = 1 |
|
|
data = word_freq |
|
|
df = pd.DataFrame(data.items(), columns=['Column Type', 'Count']) |
|
|
return df |
|
|
|
|
|
def get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors): |
|
|
|
|
|
huge_list_clmn_clr_loc = [] |
|
|
for text_location, column_location in txt_clmn: |
|
|
word = txtpts_ky_vlu[text_location] |
|
|
huge_list_clmn_clr_loc.append((text_location, column_location, word, key_colors[word])) |
|
|
return huge_list_clmn_clr_loc |
|
|
|
|
|
def get_columns_info2(outsu4, img): |
|
|
mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255 |
|
|
mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255 |
|
|
contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_SIMPLE) |
|
|
p_column = [] |
|
|
p_wall = [] |
|
|
wall_contour = [] |
|
|
all_points = [] |
|
|
wall_mid_and_full = {} |
|
|
|
|
|
for i, cnt in enumerate(contours): |
|
|
M = cv2.moments(cnt) |
|
|
if M['m00'] != 0.0: |
|
|
x1 = int(M['m10']/M['m00']) |
|
|
y1 = int(M['m01']/M['m00']) |
|
|
|
|
|
area = cv2.contourArea(cnt) |
|
|
if area > (881.0): |
|
|
perimeter = cv2.arcLength(cnt,True) |
|
|
p_wall.append((x1,y1)) |
|
|
|
|
|
cv2.drawContours(mask_walls, [cnt], -1, 0, -1) |
|
|
wall_contour.append(cnt) |
|
|
all_points.append((x1,y1)) |
|
|
wall_mid_and_full[(x1, y1)] = cnt |
|
|
|
|
|
if area < (881.0 * 2) and area > 90: |
|
|
|
|
|
all_points.append((x1,y1)) |
|
|
p_column.append((x1,y1)) |
|
|
|
|
|
cv2.drawContours(mask_clmns, [cnt], -1, 0, -1) |
|
|
wall_mid_and_full[(x1, y1)] = cnt |
|
|
|
|
|
return p_column, p_wall, mask_clmns, mask_walls, wall_contour, all_points, wall_mid_and_full |
|
|
|
|
|
def get_all_wall_points(wall_contours): |
|
|
all_contours = [] |
|
|
for cnt in wall_contours: |
|
|
contour_points = [(int(point[0][0]), int(point[0][1])) for point in cnt] |
|
|
all_contours.append(contour_points) |
|
|
return all_contours |
|
|
|
|
|
def get_text_wall_text(input_pdf_path): |
|
|
|
|
|
pdf_document = fitz.open("pdf", input_pdf_path) |
|
|
results = [] |
|
|
|
|
|
for page_num in range(pdf_document.page_count): |
|
|
page = pdf_document[page_num] |
|
|
width, height = page.rect.width, page.rect.height |
|
|
|
|
|
text_instances = [word for word in page.get_text("words") if word[4].startswith(("w", "W")) and len(word[4]) <= 5] |
|
|
|
|
|
page.apply_redactions() |
|
|
return text_instances |
|
|
|
|
|
def distance(p1, p2): |
|
|
return math.hypot(p1[0] - p2[0], p1[1] - p2[1]) |
|
|
|
|
|
def assign_walls_to_texts(text_locations, wall_locations, threshold=55): |
|
|
matched_texts = [] |
|
|
matched_walls = [] |
|
|
text_wall_pairs = [] |
|
|
|
|
|
for text in text_locations: |
|
|
nearest_wall = min(wall_locations, key=lambda wall: distance(wall, text)) |
|
|
dist = distance(nearest_wall, text) |
|
|
print(f"Text {text} -> Nearest wall {nearest_wall}, Distance: {dist:.2f}") |
|
|
|
|
|
if dist < threshold: |
|
|
matched_texts.append(text) |
|
|
matched_walls.append(nearest_wall) |
|
|
text_wall_pairs.append((text, nearest_wall)) |
|
|
|
|
|
return matched_texts, matched_walls, text_wall_pairs |
|
|
|
|
|
def mainfun(plan_path, segmented_img): |
|
|
|
|
|
print("Main started") |
|
|
pdf_document = fitz.open("pdf", plan_path) |
|
|
|
|
|
page = pdf_document[0] |
|
|
img_cv2 = convert2img(plan_path) |
|
|
rotation = page.rotation |
|
|
derotationMatrix=page.derotation_matrix |
|
|
nparr = np.frombuffer(segmented_img, np.uint8) |
|
|
segmented_img_cv2 = cv2.imdecode(nparr, cv2.IMREAD_UNCHANGED) |
|
|
outsu = threshold(segmented_img_cv2) |
|
|
column_points, mask_clmns, mask_walls = get_columns_info(outsu, img_cv2) |
|
|
texts_from_pdf = get_text_from_pdf(plan_path) |
|
|
text_points, txtpts_ky_vlu = getTextsPoints(texts_from_pdf, page) |
|
|
|
|
|
key_colors = color_groups(txtpts_ky_vlu) |
|
|
nearby, slct_clms, txt_clmn = getNearestText(text_points, column_points) |
|
|
columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu) |
|
|
legend = generate_legend(columns_types_v) |
|
|
huge_list_clmn_clr_loc = get_drawing_info(txt_clmn, txtpts_ky_vlu, key_colors) |
|
|
column_midpoints, wall_midpoints, mask_clmns, mask_walls, wall_contours, all_points, midpoint_full_contour= get_columns_info2(outsu, img_cv2) |
|
|
wall_points = get_all_wall_points(wall_contours) |
|
|
wall_text = get_text_wall_text(plan_path) |
|
|
_,slct_walls, txt_wall = assign_walls_to_texts(text_points, all_points, 90) |
|
|
all_walls = [] |
|
|
for wll in slct_walls: |
|
|
all_walls.append(midpoint_full_contour[wll]) |
|
|
|
|
|
selected_wall_contours = get_all_wall_points(all_walls) |
|
|
print("Main Khalaset") |
|
|
|
|
|
return { |
|
|
'legend': legend.to_dict(orient='records'), |
|
|
'num_columns_detected': len(column_points), |
|
|
'num_texts': len(text_points), |
|
|
'status': 'success' |
|
|
} |
|
|
|