MeasurementTesting

Sleeping

App Files Files Community

Marthee commited on Sep 27, 2023

Commit

dd3a079

1 Parent(s): a7246a5

Upload tameem2_1.py

Browse files

Files changed (1) hide show

tameem2_1.py +139 -0

tameem2_1.py ADDED Viewed

	@@ -0,0 +1,139 @@

+# -*- coding: utf-8 -*-
+"""(Deployment)2.1 counting columns.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1R2CszBuVN-Rugu8CyGQzqsdFw11E3eHN
+## Libraries
+"""
+# from google.colab.patches import cv2_imshow
+import cv2
+import numpy as np
+import pandas as pd
+import statistics
+from statistics import mode
+from PIL import Image
+# pip install PyPDF2
+# pip install PyMuPDF
+# pip install pip install PyMuPDF==1.19.0
+import io
+# !pip install pypdfium2
+import pypdfium2 as pdfium
+import fitz  # PyMuPDF
+import pandas as pd
+import pilecaps_adr
+"""# Functions"""
+def get_text_from_pdf(input_pdf_path):
+    pdf_document = fitz.open('dropbox_plans/2.1/'+input_pdf_path)
+    for page_num in range(pdf_document.page_count):
+        page = pdf_document[page_num]
+        text_instances = page.get_text("words")
+        page.apply_redactions()
+    return text_instances
+def convert2img(path):
+    pdf = pdfium.PdfDocument('dropbox_plans/2.1/'+path)
+    page = pdf.get_page(0)
+    pil_image = page.render().to_pil()
+    pl1=np.array(pil_image)
+    img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
+    return img
+def segment(img):
+  lowerRange1 = np.array([0, 9, 0])
+  upperRange1 = np.array([81, 255, 255])
+  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+  mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
+  imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
+  return imgResult3
+def threshold(imgResult3):
+  gaus = cv2.GaussianBlur(imgResult3, (3,3),9)
+  gray2 = cv2.cvtColor(gaus, cv2.COLOR_BGR2GRAY)
+  outsu2 = cv2.threshold(gray2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+  return outsu2
+# Deleted the image drawing
+def getColumnsPoints(outsu4):
+  contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
+  p = []
+  for i, cnt in enumerate(contours):
+    M = cv2.moments(cnt)
+    if M['m00'] != 0.0:
+      x1 = int(M['m10']/M['m00'])
+      y1 = int(M['m01']/M['m00'])
+    p.append((x1,y1))
+  return p
+def getTextsPoints(x):
+  point_list = []
+  for h in x:
+    point_list.append((h[2],h[3]))
+  return point_list
+def distance(point1, point2):
+    x1, y1 = point1
+    x2, y2 = point2
+    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
+def getNearestText(point_list, p):
+  nearbyy = []
+  dis = []
+  for i in range(len(p)):
+    nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
+    dist = distance(nearest_point, p[i])
+    dis.append(dist)
+    if dist < 44:
+      nearbyy.append(nearest_point)
+  return nearbyy
+def getColumnsTypes(nearbyy, x):
+  found_tuple = []
+  # Loop through the list of tuples
+  for i in range(len(nearbyy)):
+    for tpl in x:
+      if tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]:
+        found_tuple.append(tpl[4])
+  return found_tuple
+def generate_legend(found_tuple):
+  word_freq = {}
+  for word in found_tuple:
+    if word in word_freq:
+        word_freq[word] += 1
+    else:
+        word_freq[word] = 1
+  data = word_freq
+  df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
+  return df
+def mainfun(plan,pathtoplan):
+  texts_from_pdf = get_text_from_pdf(plan)
+  img = convert2img(plan)
+  imgResult = segment(img)
+  outsu = threshold(imgResult)
+  column_points = getColumnsPoints(outsu)
+  text_points = getTextsPoints(texts_from_pdf)
+  nearby = getNearestText(text_points, column_points)
+  columns_types = getColumnsTypes(nearby, texts_from_pdf)
+  legend = generate_legend(columns_types)
+  gc,spreadsheet_service,spreadsheetId ,spreadsheet_url  , namepathArr=pilecaps_adr.legendGoogleSheets(legend,path=plan,pdfpath=pathtoplan)
+  return spreadsheet_url
+"""# Call"""