MeasurementTesting

Sleeping

App Files Files Community

Marthee commited on Jan 6, 2025

Commit

e5cf808

verified ·

1 Parent(s): 49170fb

Create 2.1_Counting_Columns

Browse files

Files changed (1) hide show

2.1_Counting_Columns +167 -0

2.1_Counting_Columns ADDED Viewed

	@@ -0,0 +1,167 @@

+import cv2
+import numpy as np
+import pandas as pd
+import statistics
+from statistics import mode
+from PIL import Image
+import io
+import pypdfium2 as pdfium
+import fitz  # PyMuPDF
+import os
+def get_text_from_pdf(input_pdf_path):
+    pdf_document = fitz.open(input_pdf_path)
+    for page_num in range(pdf_document.page_count):
+        page = pdf_document[page_num]
+        text_instances = page.get_text("words")
+        page.apply_redactions()
+    return text_instances
+def convert2img(path):
+    pdf = pdfium.PdfDocument(path)
+    page = pdf.get_page(0)
+    pil_image = page.render().to_pil()
+    pl1=np.array(pil_image)
+    img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
+    return img
+def changeWhiteColumns(img):
+  imgCopy = img.copy()
+  hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
+  white_range_low = np.array([0,0,250])
+  white_range_high = np.array([0,0,255])
+  mask2=cv2.inRange(hsv,white_range_low, white_range_high)
+  imgCopy[mask2>0]=(255,0,0)
+  return imgCopy
+def changeGrayModify(img):
+  #noGray = changeWhiteColumns(img)
+  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+  #gray_range_low = np.array([0,0,180])
+  #gray_range_high = np.array([0,0,240])
+  gray_range_low = np.array([0,0,175])
+  gray_range_high = np.array([0,0,199])
+  mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
+  img[mask>0]=(255,0,0)
+  return img
+def segment_blue(gray_changed):
+  hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)
+  lowerRange1 = np.array([120, 255, 255])
+  upperRange1 = np.array([179, 255, 255])
+  mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
+  imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)
+  return imgResult3
+def segment_brown(img):
+  lowerRange1 = np.array([0, 9, 0])
+  upperRange1 = np.array([81, 255, 255])
+  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+  mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
+  imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
+  return imgResult3
+def threshold(imgResult3):
+  gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
+  gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
+  outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+  return outsu4
+def get_columns_info(outsu4, img):
+  mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
+  mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
+  contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
+  p = [] #to save points of each contour
+  for i, cnt in enumerate(contours):
+    M = cv2.moments(cnt)
+    if M['m00'] != 0.0:
+        x1 = int(M['m10']/M['m00'])
+        y1 = int(M['m01']/M['m00'])
+    area = cv2.contourArea(cnt)
+    if area > (881.0*2):
+      perimeter = cv2.arcLength(cnt,True)
+      #print(perimeter)
+      cv2.drawContours(mask_walls, [cnt], -1, 0, -1)
+    if area < (881.0 * 2) and area > 90:
+      # maybe make it area < (881.0 * 1.5)
+      p.append((x1,y1))
+      #print(area)
+      cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
+  return p, mask_clmns, mask_walls
+def getTextsPoints(x):
+  point_list = []
+  for h in x:
+    point_list.append((h[2],h[3]))
+  return point_list
+def distance(point1, point2):
+    x1, y1 = point1
+    x2, y2 = point2
+    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
+def getNearestText(point_list, p):
+  nearbyy = []
+  dis = []
+  for i in range(len(p)):
+    nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
+    dist = distance(nearest_point, p[i])
+    dis.append(dist)
+    if dist < 44:
+      nearbyy.append(nearest_point)
+  return nearbyy
+def getColumnsTypes(nearbyy, x):
+  found_tuple = []
+  # Loop through the list of tuples
+  for i in range(len(nearbyy)):
+    for tpl in x:
+      if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
+        found_tuple.append(tpl[4])
+  return found_tuple
+def generate_legend(found_tuple):
+  word_freq = {}
+  for word in found_tuple:
+    if word in word_freq:
+        word_freq[word] += 1
+    else:
+        word_freq[word] = 1
+  data = word_freq
+  df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
+  return df
+def mainfun(plan):
+  texts_from_pdf = get_text_from_pdf(plan)
+  img = convert2img(plan)
+  imgResult = segment_brown(img)
+  outsu = threshold(imgResult)
+  column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
+  if len(column_points) > 10:
+    # BROWN COLUMNS
+    text_points = getTextsPoints(texts_from_pdf)
+    nearby = getNearestText(text_points, column_points)
+    columns_types = getColumnsTypes(nearby, texts_from_pdf)
+    legend = generate_legend(columns_types)
+  else:
+    # BLUE COLUMNS
+    img_blue = changeGrayModify(img)
+    imgResult = segment_blue(img_blue)
+    outsu = threshold(imgResult)
+    column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
+    text_points = getTextsPoints(texts_from_pdf)
+    nearby = getNearestText(text_points, column_points)
+    columns_types = getColumnsTypes(nearby, texts_from_pdf)
+    legend = generate_legend(columns_types)
+  return legend