Marthee commited on
Commit
722cf6b
·
verified ·
1 Parent(s): 1bf15b7

Create old_Counting_Columns_2_1.py

Browse files
Files changed (1) hide show
  1. old_Counting_Columns_2_1.py +334 -0
old_Counting_Columns_2_1.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import pandas as pd
4
+ import statistics
5
+ from statistics import mode
6
+ from PIL import Image
7
+ import io
8
+ import google_sheet_Legend
9
+ import pypdfium2 as pdfium
10
+ import fitz # PyMuPDF
11
+ import os
12
+ import random
13
+
14
+ def get_text_from_pdf(input_pdf_path):
15
+ pdf_document = fitz.open('pdf',input_pdf_path)
16
+
17
+ for page_num in range(pdf_document.page_count):
18
+ page = pdf_document[page_num]
19
+ text_instances = page.get_text("words")
20
+
21
+ page.apply_redactions()
22
+ return text_instances
23
+
24
+ def convert2img(path):
25
+ pdf = pdfium.PdfDocument(path)
26
+ page = pdf.get_page(0)
27
+ pil_image = page.render().to_pil()
28
+ pl1=np.array(pil_image)
29
+ img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
30
+ return img
31
+
32
+ def changeWhiteColumns(img):
33
+ imgCopy = img.copy()
34
+ hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
35
+ white_range_low = np.array([0,0,250])
36
+ white_range_high = np.array([0,0,255])
37
+ mask2=cv2.inRange(hsv,white_range_low, white_range_high)
38
+ imgCopy[mask2>0]=(255,0,0)
39
+ return imgCopy
40
+
41
+ def changeGrayModify(img):
42
+ hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
43
+
44
+ gray_range_low = np.array([0,0,175])
45
+ gray_range_high = np.array([0,0,199])
46
+
47
+ mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
48
+ img[mask>0]=(255,0,0)
49
+ return img
50
+
51
+ def segment_blue(gray_changed):
52
+ hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)
53
+
54
+ lowerRange1 = np.array([120, 255, 255])
55
+ upperRange1 = np.array([179, 255, 255])
56
+ mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
57
+ imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)
58
+
59
+ return imgResult3
60
+
61
+ def segment_brown(img):
62
+ lowerRange1 = np.array([0, 9, 0])
63
+ upperRange1 = np.array([81, 255, 255])
64
+ hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
65
+ mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
66
+ imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
67
+ return imgResult3
68
+
69
+ def threshold(imgResult3):
70
+ gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
71
+ gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
72
+ outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
73
+ return outsu4
74
+
75
+ def get_columns_info(outsu4, img):
76
+ mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
77
+ mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
78
+ contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
79
+ p = [] #to save points of each contour
80
+ for i, cnt in enumerate(contours):
81
+ M = cv2.moments(cnt)
82
+ if M['m00'] != 0.0:
83
+ x1 = int(M['m10']/M['m00'])
84
+ y1 = int(M['m01']/M['m00'])
85
+
86
+ area = cv2.contourArea(cnt)
87
+ if area > (881.0*2):
88
+ perimeter = cv2.arcLength(cnt,True)
89
+ #print(perimeter)
90
+ cv2.drawContours(mask_walls, [cnt], -1, 0, -1)
91
+
92
+ if area < (881.0 * 2) and area > 90:
93
+ # maybe make it area < (881.0 * 1.5)
94
+ p.append((x1,y1))
95
+ #print(area)
96
+ cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
97
+ return p, mask_clmns, mask_walls
98
+
99
+ def getTextsPoints(x):
100
+ point_list = []
101
+ pt_clm = {}
102
+ for h in x:
103
+ point_list.append(calculate_midpoint(h[1],h[0],h[3],h[2]))
104
+ pt_clm[calculate_midpoint(h[1],h[0],h[3],h[2])] = h[4]
105
+ return point_list, pt_clm
106
+
107
+ def fix_90_ky_val(pt_clm, derotationMatrix):
108
+ new_derotated = {}
109
+ for ky in pt_clm:
110
+ pts = fitz.Point(ky[0], ky[1]) * derotationMatrix
111
+ new_ky = ((int(pts.y),int(pts.x)))
112
+ new_derotated[new_ky] = pt_clm[ky]
113
+ return new_derotated
114
+
115
+ def calculate_midpoint(x1,y1,x2,y2):
116
+ xm = int((x1 + x2) / 2)
117
+ ym = int((y1 + y2) / 2)
118
+ return (xm, ym)
119
+
120
+ def getColumnsTypesKeyValue(nearbyy, pt_clm):
121
+ words = []
122
+ for i in range(len(nearbyy)):
123
+ words.append(pt_clm[nearbyy[i]])
124
+ return words
125
+
126
+ def fix_rotation_90(pc_coordinates, derotationMatrix):
127
+ coor = []
128
+ for coordinate in pc_coordinates:
129
+ pts = fitz.Point(coordinate[0], coordinate[1]) * derotationMatrix
130
+ coor.append((int(pts.y),int(pts.x)))
131
+ return coor
132
+
133
+ def distance(point1, point2):
134
+ x1, y1 = point1
135
+ x2, y2 = point2
136
+ return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)
137
+
138
+ def getNearestText(point_list, p):
139
+ nearbyy = []
140
+ selected_clm_point = [] #save the clmn for drawing cirlce on it
141
+ dis = []
142
+ txt_clmn = []
143
+ for i in range(len(p)):
144
+ nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
145
+ dist = distance(nearest_point, p[i])
146
+ dis.append(dist)
147
+ if dist < 44:
148
+ nearbyy.append(nearest_point)
149
+ selected_clm_point.append(p[i])
150
+ txt_clmn.append((nearest_point, p[i]))
151
+ return nearbyy, selected_clm_point, txt_clmn
152
+
153
+
154
+ def getColumnsTypes(nearbyy, x):
155
+ found_tuple = []
156
+ # Loop through the list of tuples
157
+ for i in range(len(nearbyy)):
158
+ for tpl in x:
159
+ if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
160
+ found_tuple.append(tpl[4])
161
+ return found_tuple
162
+
163
+ def generate_legend(found_tuple):
164
+ word_freq = {}
165
+ for word in found_tuple:
166
+ if word in word_freq:
167
+ word_freq[word] += 1
168
+ else:
169
+ word_freq[word] = 1
170
+ data = word_freq
171
+ df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
172
+ return df
173
+
174
+ def color_groups(txtpts_ky_vlu):
175
+ unique_labels = list(set(txtpts_ky_vlu.values()))
176
+ def generate_rgb():
177
+ return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # RGB tuple
178
+ key_colors = {key: generate_rgb() for key in unique_labels} # Assign a unique RGB color to each key
179
+ return key_colors
180
+
181
+ def get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors):
182
+ #Search for each word in the txt_clmn to get the word associated to it
183
+ huge_list_clmn_clr_loc = []
184
+ for text_location, column_location in txt_clmn:
185
+ word = txtpts_ky_vlu[text_location]
186
+ huge_list_clmn_clr_loc.append((text_location, column_location, word, key_colors[word]))
187
+ return huge_list_clmn_clr_loc #text_location, column_location, word, color
188
+ '''def add_annotations_to_pdf(image, pdf_name, slctd_clm, columns_types_v):
189
+ image_width = image.shape[1]
190
+ image_height = image.shape[0]
191
+ # Create a new PDF document
192
+ pdf_document = fitz.open('pdf',pdf_name)
193
+ page=pdf_document[0]
194
+ rotationOld=page.rotation
195
+ derotationMatrix=page.derotation_matrix
196
+ if page.rotation!=0:
197
+ rotationangle = page.rotation
198
+ page.set_rotation(0)
199
+ for i in range(len(slctd_clm)):
200
+ x, y = slctd_clm[i]
201
+ p_midpoint = fitz.Point(x, y) * derotationMatrix
202
+ text = columns_types_v[i]
203
+ # Create an annotation (sticky note)
204
+ annot = page.add_text_annot((p_midpoint.x, p_midpoint.y), text)
205
+ annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling
206
+ annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red
207
+ annot.update()
208
+ page.set_rotation(rotationOld)
209
+ return pdf_document'''
210
+
211
+ def add_annotations_to_pdf(image, pdf_name, huge_list_clmn_clr_loc):
212
+ image_width = image.shape[1]
213
+ image_height = image.shape[0]
214
+ # Create a new PDF document
215
+ pdf_document = fitz.open('pdf',pdf_name)
216
+ page=pdf_document[0]
217
+ rotationOld=page.rotation
218
+ derotationMatrix=page.derotation_matrix
219
+ if page.rotation!=0:
220
+ rotationangle = page.rotation
221
+ page.set_rotation(0)
222
+ #for i in range(len(slctd_clm)):
223
+ for text_loc, column_loc, word, clr in huge_list_clmn_clr_loc:
224
+ x, y = column_loc
225
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
226
+ #x, y = slctd_clm[i]
227
+ p_midpoint = fitz.Point(x, y) * derotationMatrix
228
+ annot = page.add_circle_annot(
229
+ fitz.Rect(p_midpoint.x - 10, p_midpoint.y - 10, p_midpoint.x + 10,p_midpoint.y + 10) # Small circle
230
+ )
231
+ # ✅ Assign required Bluebeam metadata
232
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
233
+ annot.set_border(width=2) # Border thickness
234
+ annot.set_opacity(1) # Fully visible
235
+ #text = columns_types_v[i]
236
+ # ✅ Set annotation properties for Bluebeam Count detection
237
+ annot.set_info("name", word) # Unique name for each count
238
+ annot.set_info("subject", "Count") # ✅ Bluebeam uses "Count" for Count markups
239
+ annot.set_info("title", word) # Optional
240
+ annot.update() # Apply changes
241
+ page.set_rotation(rotationOld)
242
+ return pdf_document
243
+
244
+ def mainfun(pdf_name,pdfpath,planname):
245
+ pdf_document = fitz.open('pdf',pdf_name)
246
+ page = pdf_document[0]
247
+ rotation = page.rotation
248
+ derotationMatrix=page.derotation_matrix
249
+ texts_from_pdf = get_text_from_pdf(pdf_name)
250
+ text_points, txtpts_ky_vlu = getTextsPoints(texts_from_pdf)
251
+ if rotation != 0:
252
+ if rotation ==90:
253
+ text_points = fix_rotation_90(text_points, derotationMatrix)
254
+ txtpts_ky_vlu = fix_90_ky_val(txtpts_ky_vlu, derotationMatrix)
255
+
256
+ img = convert2img(pdf_name)
257
+ imgResult = segment_brown(img)
258
+ outsu = threshold(imgResult)
259
+ column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
260
+ key_colors = color_groups(txtpts_ky_vlu)
261
+
262
+ if len(column_points) > 10:
263
+ # BROWN COLUMNS
264
+ nearby, slctd_clm, txt_clmn = getNearestText(text_points, column_points)
265
+ columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
266
+ legend = generate_legend(columns_types_v)
267
+ huge_list_clmn_clr_loc = get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors)
268
+
269
+ else:
270
+ # BLUE COLUMNS
271
+ img_blue = changeGrayModify(img)
272
+ imgResult = segment_blue(img_blue)
273
+ outsu = threshold(imgResult)
274
+ column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
275
+ nearby, slctd_clm, txt_clmn = getNearestText(text_points, column_points)
276
+ columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
277
+ legend = generate_legend(columns_types_v)
278
+ huge_list_clmn_clr_loc = get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors)
279
+
280
+ pdf_document = add_annotations_to_pdf(img, pdf_name, huge_list_clmn_clr_loc)
281
+ page=pdf_document[0]
282
+ pix = page.get_pixmap() # render page to an image
283
+ pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
284
+ img=np.array(pl)
285
+ annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
286
+
287
+ legend = legend.fillna(' ')
288
+ gc,spreadsheet_service,spreadsheetId, spreadsheet_url , namepathArr=google_sheet_Legend.legendGoogleSheets(legend , planname,pdfpath)
289
+ list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
290
+ for page in pdf_document:
291
+ for annot in page.annots():
292
+ annot_color = annot.colors
293
+ if annot_color is not None:
294
+ stroke_color = annot_color.get('stroke') # Border color
295
+ print('strokeee',stroke_color)
296
+ if stroke_color:
297
+ v='stroke'
298
+ list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[255,0,0]]
299
+
300
+
301
+ print('list1',list1)
302
+ return annotatedimg, pdf_document , spreadsheet_url, list1, legend
303
+
304
+ '''def mainfun(plan):
305
+ texts_from_pdf = get_text_from_pdf(plan)
306
+ img = convert2img(plan)
307
+ imgResult = segment_brown(img)
308
+ outsu = threshold(imgResult)
309
+ column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
310
+ if len(column_points) > 10:
311
+ # BROWN COLUMNS
312
+ text_points = getTextsPoints(texts_from_pdf)
313
+ nearby = getNearestText(text_points, column_points)
314
+ if rotation != 0:
315
+ if rotation ==90:
316
+ nearby = fix_rotation_90(pc_coordinates)
317
+ columns_types = getColumnsTypes(nearby, texts_from_pdf)
318
+ legend = generate_legend(columns_types)
319
+ else:
320
+ # BLUE COLUMNS
321
+ img_blue = changeGrayModify(img)
322
+ imgResult = segment_blue(img_blue)
323
+ outsu = threshold(imgResult)
324
+ column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
325
+ text_points = getTextsPoints(texts_from_pdf)
326
+ nearby = getNearestText(text_points, column_points)
327
+ if rotation != 0:
328
+ if rotation ==90:
329
+ nearby = fix_rotation_90(pc_coordinates)
330
+ columns_types = getColumnsTypes(nearby, texts_from_pdf)
331
+ legend = generate_legend(columns_types)
332
+ return legend'''
333
+
334
+