Marthee commited on
Commit
3690e7c
·
verified ·
1 Parent(s): 3a0bcad

Create old_Doors_schedule.py

Browse files
Files changed (1) hide show
  1. old_Doors_schedule.py +1288 -0
old_Doors_schedule.py ADDED
@@ -0,0 +1,1288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ import pandas as pd
3
+ import random
4
+ import re
5
+ import io
6
+ import pypdfium2 as pdfium
7
+ import fitz
8
+ from PIL import Image, ImageDraw
9
+ from PyPDF2 import PdfReader, PdfWriter
10
+ from PyPDF2.generic import TextStringObject, NameObject, ArrayObject, FloatObject
11
+ from PyPDF2.generic import NameObject, TextStringObject, DictionaryObject, FloatObject, ArrayObject
12
+ from PyPDF2 import PdfReader
13
+ from PyPDF2.generic import TextStringObject
14
+ import numpy as np
15
+ import cv2
16
+ from collections import defaultdict
17
+ import random
18
+ import fitz # PyMuPDF
19
+ import PyPDF2
20
+ import io
21
+ from PyPDF2.generic import TextStringObject # ✅ Required for setting string values
22
+ from PyPDF2 import PdfReader, PdfWriter
23
+
24
+
25
+ def convert2img(path):
26
+ pdf = pdfium.PdfDocument(path)
27
+ page = pdf.get_page(0)
28
+ pil_image = page.render().to_pil()
29
+ pl1=np.array(pil_image)
30
+ img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
31
+ return img
32
+
33
+ def convert2pillow(path):
34
+ pdf = pdfium.PdfDocument(path)
35
+ page = pdf.get_page(0)
36
+ pil_image = page.render().to_pil()
37
+ return pil_image
38
+
39
+ def calculate_midpoint(x1,y1,x2,y2):
40
+ xm = int((x1 + x2) / 2)
41
+ ym = int((y1 + y2) / 2)
42
+ return (xm, ym)
43
+
44
+ def read_text(input_pdf_path):
45
+ pdf_document = fitz.open('pdf',input_pdf_path)
46
+
47
+ for page_num in range(pdf_document.page_count):
48
+ page = pdf_document[page_num]
49
+ text_instances = page.get_text("words")
50
+
51
+ page.apply_redactions()
52
+ return text_instances
53
+
54
+ def normalize_text(text):
55
+ """
56
+ Normalize text by removing all whitespace characters and converting to lowercase.
57
+ """
58
+ if not isinstance(text, str):
59
+ return ""
60
+ # Remove all whitespace characters (spaces, tabs, newlines)
61
+ text = re.sub(r'\s+', '', text)
62
+ return text.lower()
63
+
64
+
65
+ def build_flexible_regex(term):
66
+ """
67
+ Match the full string, allowing whitespace or light punctuation between words,
68
+ but not allowing extra words or partial matches.
69
+ """
70
+ words = normalize_text(term).split()
71
+ pattern = r'[\s\.\:\-]*'.join(map(re.escape, words))
72
+ full_pattern = rf'^{pattern}$'
73
+ return re.compile(full_pattern, re.IGNORECASE)
74
+
75
+ def flexible_search(df, search_terms):
76
+ """
77
+ Search for terms in column names and top N rows.
78
+ Returns matched column indices and cell positions.
79
+ """
80
+ normalized_columns = [normalize_text(col) for col in df.columns]
81
+ results = {term: {"col_matches": [], "cell_matches": []} for term in search_terms}
82
+
83
+ for term in search_terms:
84
+ regex = build_flexible_regex(term)
85
+
86
+ # Search in column names
87
+ for col_idx, col_text in enumerate(df.columns):
88
+ norm_col = normalize_text(col_text)
89
+ if regex.search(norm_col):
90
+ results[term]["col_matches"].append(col_idx)
91
+
92
+ # Search in top N rows
93
+ for row_idx in range(min(3, len(df))):
94
+ for col_idx in range(len(df.columns)):
95
+ cell_text = normalize_text(df.iat[row_idx, col_idx])
96
+ if regex.search(cell_text):
97
+ results[term]["cell_matches"].append((row_idx, col_idx))
98
+
99
+ return results
100
+
101
+
102
+ def generate_current_table_without_cropping(clm_idx, clmn_name, df):
103
+ selected_df = df.iloc[:, clm_idx]
104
+ print("hello I generated the selected columns table without cropping")
105
+ selected_df.columns = clmn_name
106
+ return selected_df
107
+
108
+
109
+
110
+ def crop_rename_table(indices, clmn_name, clmn_idx,df):
111
+ #crop_at = (max(set(indices), key=indices.count)) + 1
112
+ crop_at = max(indices) + 1
113
+
114
+ df = df.iloc[crop_at:] # Starts from row index 5 (zero-based index)
115
+ df.reset_index(drop=True, inplace=True) # Reset index after cropping
116
+
117
+
118
+ slctd_clms = df.iloc[:, clmn_idx] # Select columns by index
119
+ slctd_clms.columns = clmn_name # Rename selected columns
120
+
121
+ return slctd_clms
122
+
123
+ def clean_column_row(row):
124
+ return [re.sub(r'^\d+-\s*', '', str(cell)) for cell in row]
125
+
126
+ def details_in_another_table(clmn_name, clmn_idx, current_dfs, dfs):
127
+ matching_dfs = [
128
+ dff for dff in dfs
129
+ if dff is not current_dfs and current_dfs.shape[1] == dff.shape[1]
130
+ ]
131
+
132
+ if not matching_dfs:
133
+ return None
134
+
135
+ updated_dfs = []
136
+ for dff in matching_dfs:
137
+ selected_dff = dff.iloc[:, clmn_idx].copy()
138
+
139
+ # Clean the column names and make them a row
140
+ cleaned_header = clean_column_row(selected_dff.columns.tolist())
141
+ col_names_as_row = pd.DataFrame([cleaned_header])
142
+
143
+ # Rename columns
144
+ selected_dff.columns = clmn_name
145
+ col_names_as_row.columns = clmn_name
146
+
147
+ # Combine the cleaned row with data
148
+ temp_df = pd.concat([col_names_as_row, selected_dff], ignore_index=True)
149
+ updated_dfs.append(temp_df)
150
+
151
+ combined_df = pd.concat(updated_dfs, ignore_index=True)
152
+
153
+ return combined_df
154
+
155
+ def map_user_input_to_standard_labels(user_inputs):
156
+ patterns = {
157
+ 'door_id': r'\b(?:door\s*)?(?:id|no|number)\b|\bdoor\s*name\b',
158
+ 'door_type': r'\b(?:\S+\s+)?door\s*type\b|\btype(?:\s+\w+)?\b',
159
+ 'structural_opening': r'\bstructural\s+opening\b',
160
+ 'width': r'\bwidth\b',
161
+ 'height': r'\bheight\b',
162
+ }
163
+
164
+ def normalize(text):
165
+ return re.sub(r'\s+', ' ', text.strip(), flags=re.MULTILINE).lower()
166
+
167
+ mapped = {}
168
+
169
+ for item in user_inputs:
170
+ normalized_item = normalize(item)
171
+ matched = False
172
+ for label, pattern in patterns.items():
173
+ if label not in mapped and re.search(pattern, normalized_item, re.IGNORECASE):
174
+ mapped[label] = item
175
+ matched = True
176
+ break
177
+ #if not matched:
178
+ # mapped[normalized_item] = None
179
+
180
+ return mapped
181
+
182
+ def analyse_cell_columns(cell_columns_appearance):
183
+ cell_matches = []
184
+ col_matches = []
185
+ for key in cell_columns_appearance.keys():
186
+ if len(cell_columns_appearance[key]['cell_matches']) >0:
187
+ cell_matches.append(cell_columns_appearance[key]['cell_matches'][0])
188
+ if len(cell_columns_appearance[key]['col_matches']) >0:
189
+ col_matches.append(cell_columns_appearance[key]['col_matches'][0])
190
+ return cell_matches, col_matches
191
+
192
+ # when column names are located in the cells
193
+ def get_row_column_indices(cell_clmn_indx):
194
+ row_index = []
195
+ column_index = []
196
+ for t in cell_clmn_indx:
197
+ row_index.append(t[0])
198
+ column_index.append(t[1])
199
+ return row_index, column_index
200
+
201
+ # when column names are located in the coulmns itself
202
+ def get_column_index(col_matches):
203
+ idx = []
204
+ for t in col_matches:
205
+ idx.append(t)
206
+ return idx
207
+
208
+
209
+ def extract_tables(schedule):
210
+ doc = fitz.open("pdf",schedule)
211
+ for page in doc:
212
+ tabs = page.find_tables()
213
+ dfs = []
214
+ for tab in tabs:
215
+ df = tab.to_pandas()
216
+ dfs.append(df)
217
+ return dfs
218
+
219
+ def get_selected_columns(dfs, user_patterns):
220
+ selected_columns = []
221
+ selected_columns_new = None # Initialize selected_columns_new to None
222
+
223
+ for i in range(len(dfs)):
224
+ cell_columns_appearance = flexible_search(dfs[i], user_patterns)
225
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
226
+
227
+
228
+
229
+ if len(user_patterns) == 2:
230
+ clmn_name = ["door_id", "door_type"]
231
+ if len(user_patterns) == 4:
232
+ clmn_name = ["door_id", "door_type", "width", "height"]
233
+ if len(user_patterns) == 3:
234
+ clmn_name = ["door_id", "door_type", "structural opening"]
235
+ if len(cell_matches) == 0 and len(col_matches) == 0:
236
+ print(f"this is df {i}, SEARCH IN ANOTHER DF")
237
+ else:
238
+ #IN COLUMNS
239
+ if len(col_matches) == len(user_patterns):
240
+ column_index_list = get_column_index(col_matches)
241
+ print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
242
+
243
+ print(column_index_list)
244
+ if len(dfs[i]) <10:
245
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
246
+
247
+ #details in the same table
248
+ if len(dfs[i]) >10:
249
+ selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
250
+ #break
251
+
252
+ #IN CELLS
253
+ if len(cell_matches) == len(user_patterns):
254
+ row_index_list, column_index_list = get_row_column_indices(cell_matches)
255
+ print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
256
+
257
+ #details in another table
258
+ if len(dfs[i]) <10:
259
+ #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
260
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
261
+ break
262
+ #details in the same table
263
+ if len(dfs[i]) >10:
264
+ print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
265
+ selected_columns_new = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
266
+ break
267
+ return selected_columns_new
268
+
269
+
270
+
271
+ def separate_main_secondary(input_user_clmn_names):
272
+ main_info = input_user_clmn_names[:4]
273
+ secondary_info = input_user_clmn_names[4:]
274
+ return main_info, secondary_info
275
+
276
+
277
+ # take main info
278
+ def get_column_name(user_input_m):
279
+ #get empty indices
280
+ empty_indices = [i for i, v in enumerate(user_input_m) if v == '']
281
+
282
+ # fixed column names
283
+ fixed_list = ["door_id", "door_type", "width", "height"]
284
+ for i in range(len(empty_indices)):
285
+ if empty_indices[i] == 3:
286
+ fixed_list[2] = "structural_opening"
287
+ fixed_list[empty_indices[i]] = ""
288
+
289
+ #finalize the column name structure
290
+ clmn_name_m = [i for i in fixed_list if i]
291
+
292
+ return clmn_name_m
293
+
294
+ # take secondary info
295
+ def get_column_name_secondary(user_input_m):
296
+ #get empty indices
297
+ empty_indices = [i for i, v in enumerate(user_input_m) if v == '']
298
+
299
+ # fixed column names
300
+ fixed_list = ["fire_rate", "acoustic_rate"]
301
+ for i in range(len(empty_indices)):
302
+ fixed_list[empty_indices[i]] = ""
303
+
304
+ #finalize the column name structure
305
+ clmn_name_m = [i for i in fixed_list if i]
306
+
307
+ return clmn_name_m
308
+
309
+
310
+ #handling both main and secondary info together in one table
311
+ def get_selected_columns_all(dfs, user_patterns):
312
+ selected_columns = []
313
+ selected_columns_new = None # Initialize selected_columns_new to None
314
+
315
+ for i in range(len(dfs)):
316
+
317
+
318
+
319
+
320
+
321
+ main_info, secondary_info = separate_main_secondary(user_patterns)
322
+ clmn_name_main = get_column_name(main_info)
323
+ non_empty_main_info = [item for item in main_info if item]
324
+
325
+ clmn_name_secondary = get_column_name_secondary(secondary_info)
326
+
327
+
328
+ non_empty_secondary_info = [item for item in secondary_info if item]
329
+
330
+ clmn_name = clmn_name_main + clmn_name_secondary
331
+ non_empty_info = non_empty_main_info + non_empty_secondary_info
332
+
333
+ #print(f"main info: {main_info}")
334
+ print(f"clmn name: {clmn_name}")
335
+ print(f"non-empty info: {non_empty_info}")
336
+ #print(f"length of non-empty info: {len(non_empty_main_info)}")
337
+
338
+
339
+ cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
340
+ cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
341
+
342
+ print(f"length of cell_matches: {len(cell_matches)}")
343
+ print(f"cell_matches: {cell_matches}")
344
+ #clmn_name = map_user_input_to_standard_labels(user_patterns)
345
+ #if len(clmn_name) < len(user_patterns):
346
+
347
+
348
+
349
+
350
+ print(clmn_name)
351
+
352
+ if len(cell_matches) == 0 and len(col_matches) == 0:
353
+ print(f"this is df {i}, SEARCH IN ANOTHER DF")
354
+
355
+ else:
356
+ #IN COLUMNS
357
+ if len(col_matches) == len(non_empty_info):
358
+ column_index_list = get_column_index(col_matches)
359
+ print(f"this is df {i} mawgooda fel columns, check el df length 3ashan law el details fe table tany")
360
+ #print(len(clm_idx))
361
+ #details in another table
362
+ print(column_index_list)
363
+ if len(dfs[i]) <10:
364
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
365
+ #break
366
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
367
+ #details in the same table
368
+ if len(dfs[i]) >10:
369
+ selected_columns_new = generate_current_table_without_cropping(column_index_list,dfs[i])
370
+ #break
371
+
372
+ #IN CELLS
373
+ if len(cell_matches) == len(non_empty_info):
374
+ row_index_list, column_index_list = get_row_column_indices(cell_matches)
375
+ print(f"this is df {i} mawgooda fel cells, check el df length 3ashan law el details fe table tany")
376
+
377
+ #details in another table
378
+ #if len(dfs[i]) <2:
379
+ #selected_columns_new = details_in_another_table(clmn_name, clmn_idx, dfs[i], dfs)
380
+ selected_columns_new = details_in_another_table(clmn_name, column_index_list, dfs[i], dfs)
381
+ selected_columns_new = crop_rename_table(row_index_list, clmn_name, column_index_list,dfs[i])
382
+
383
+ break
384
+ #other_matches = details_in_another_table_mod(clmn_name, clmn_idx, dfs[i], dfs)
385
+ ##details in the same table
386
+ #if len(dfs[i]) >2:
387
+ # #print(f"this is df {i} call crop_rename_table(indices, clmn_name, clmn_idx,df)")
388
+ #break
389
+ return selected_columns_new
390
+
391
+
392
+ # 3ayz akhaleehaa te search fel selected_columns column names nafsaha
393
+ # 7ab2a 3ayz a3raf bardo maktooba ezay fel df el 7a2e2ya (akeed za ma el user medakhalha bezabt)
394
+ def get_st_op_pattern(selected_columns, user_input):
395
+ target = 'structural_opening'
396
+ if target in selected_columns.columns:
397
+ name = user_input[2]
398
+ return name
399
+ return None
400
+
401
+
402
+ def find_text_in_plan(label, x):
403
+ substring_coordinates = []
404
+ words = []
405
+ point_list = []
406
+ #None, None, None
407
+ for tpl in x:
408
+ if tpl[4] == label:
409
+ substring_coordinates.append(calculate_midpoint(tpl[0],tpl[1],tpl[2],tpl[3]))# for pdf
410
+ point_list.append(calculate_midpoint(tpl[1],tpl[0],tpl[3],tpl[2]))# for rotated
411
+ words.append(tpl[4])
412
+ return substring_coordinates, words, point_list
413
+
414
+
415
+
416
+ def get_word_locations_plan(flattened_list, plan_texts):
417
+ locations = []
418
+ not_found = []
419
+
420
+ if len(flattened_list[0]) == 2:
421
+ for lbl, clr in flattened_list:
422
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
423
+ if len(location) ==0:
424
+ not_found.append(lbl)
425
+ locations.append((location, lbl, clr))
426
+
427
+ if len(flattened_list[0]) == 3:
428
+ for lbl, w, clr in flattened_list:
429
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
430
+ if len(location) ==0:
431
+ not_found.append(lbl)
432
+ locations.append((location, lbl, clr, w))
433
+ if len(flattened_list[0]) == 4:
434
+ for lbl, w, h, clr in flattened_list:
435
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
436
+ if len(location) ==0:
437
+ not_found.append(lbl)
438
+ locations.append((location, lbl, clr, w, h))
439
+ return locations, not_found
440
+
441
+ def get_repeated_labels(locations):
442
+ seen_labels = set()
443
+ repeated_labels = set()
444
+
445
+ for item in locations:
446
+ label = item[1]
447
+ if label in seen_labels:
448
+ repeated_labels.add(label)
449
+ else:
450
+ seen_labels.add(label)
451
+ return repeated_labels
452
+
453
+ def get_cleaned_data(locations):
454
+ processed = defaultdict(int)
455
+
456
+ new_data = []
457
+ if len(locations[0]) == 3:
458
+ for coords, label, color in locations:
459
+ if len(coords)>1:
460
+ index = processed[label] % len(coords) # Round-robin indexing
461
+ new_coord = [coords[index]] # Pick the correct coordinate
462
+ new_data.append((new_coord, label, color))
463
+ processed[label] += 1 # Move to the next coordinate for this label
464
+ if len(coords)==1:
465
+ new_data.append((coords, label, color))
466
+
467
+ if len(locations[0]) == 4:
468
+ for coords, label, color, w in locations:
469
+ if len(coords)>1:
470
+ index = processed[label] % len(coords) # Round-robin indexing
471
+ new_coord = [coords[index]] # Pick the correct coordinate
472
+ new_data.append((new_coord, label, color, w))
473
+ processed[label] += 1 # Move to the next coordinate for this label
474
+ if len(coords)==1:
475
+ new_data.append((coords, label, color, w))
476
+ if len(locations[0]) == 5:
477
+ for coords, label, color, w, h in locations:
478
+ if len(coords)>1:
479
+ index = processed[label] % len(coords) # Round-robin indexing
480
+ new_coord = [coords[index]] # Pick the correct coordinate
481
+ new_data.append((new_coord, label, color, w, h))
482
+ processed[label] += 1 # Move to the next coordinate for this label
483
+ if len(coords)==1:
484
+ new_data.append((coords, label, color, w, h))
485
+
486
+ return new_data
487
+
488
+
489
+ # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
490
+ def get_width_info_tobeprinted(new_data):
491
+ width_info_tobeprinted = []
492
+ if len(new_data[0]) == 4:
493
+ for _,_,_, w in new_data:
494
+ #w = re.sub(r",", "", w)
495
+ #w = int(float(w))
496
+ width_info_tobeprinted.append(w)
497
+ if len(new_data[0]) == 5:
498
+ for _,_,_, w,h in new_data:
499
+ w = re.sub(r",", "", w)
500
+ h = re.sub(r",", "", h)
501
+ if float(w).is_integer():
502
+ w = int(float(w))
503
+ else:
504
+ w = w
505
+ if float(h).is_integer():
506
+ h = int(float(h))
507
+ else:
508
+ h = h
509
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
510
+ return width_info_tobeprinted
511
+
512
+ def clean_dimensions(text):
513
+ # Remove commas and "mm"
514
+ text = re.sub(r'[,\s]*mm', '', text) # Remove "mm" with optional spaces or commas before it
515
+ text = text.replace(",", "") # Remove remaining commas if any
516
+ return text
517
+
518
+ def get_cleaned_width(width_info_tobeprinted):
519
+ cleaned_width = []
520
+ for w in width_info_tobeprinted:
521
+ cleaned_width.append(clean_dimensions(w))
522
+ return cleaned_width
523
+
524
+
525
+ def get_widths_bb_format(cleaned_width, kelma):
526
+ pattern = r"\bW(?:idth)?\s*[×x]\s*H(?:eight)?\b"
527
+ match = re.search(pattern, kelma)
528
+ widths = []
529
+ for widthaa in cleaned_width:
530
+ index = max(widthaa.find("x"), widthaa.find("×"), widthaa.find("x"), widthaa.find("X"), widthaa.find("x"))
531
+ width_name = widthaa[:index]
532
+ height_name = widthaa[index+1:]
533
+ width_name = int(float(width_name))
534
+ height_name = int(float(height_name))
535
+ if match:
536
+ full_text = f"{width_name} mm wide x {height_name} mm high"
537
+ else:
538
+ full_text = f"{height_name} mm wide x {width_name} mm high"
539
+ widths.append(full_text)
540
+ return widths
541
+
542
+
543
+ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
544
+ width_info_tobeprinted = []
545
+ secondary_info_tobeprinted = []
546
+
547
+ if len(main_info) == 2 and len(secondary_info) == 1:
548
+ for coords, label, acous, color in new_data:
549
+ secondary_info_tobeprinted.append(acous)
550
+
551
+
552
+ if len(main_info) == 2 and len(secondary_info) == 2:
553
+ for coords, label, acous, fire, color in new_data:
554
+ secondary_info_tobeprinted.append((acous, fire))
555
+
556
+ if len(main_info) == 3 and len(secondary_info) == 1:
557
+ for coords, label, width, acous, color in new_data:
558
+ width_info_tobeprinted.append(width)
559
+ secondary_info_tobeprinted.append(acous)
560
+
561
+
562
+ if len(main_info) == 3 and len(secondary_info) == 2:
563
+ for coords, label, width, acous, fire, color in new_data:
564
+ width_info_tobeprinted.append(width)
565
+ secondary_info_tobeprinted.append((acous, fire))
566
+
567
+ if len(main_info) == 4 and len(secondary_info) == 1:
568
+ for coords, label, width, height, acous, color in new_data:
569
+ w = re.sub(r",", "", width)
570
+ h = re.sub(r",", "", height)
571
+ if float(w).is_integer():
572
+ w = int(float(w))
573
+ else:
574
+ w = w
575
+ if float(h).is_integer():
576
+ h = int(float(h))
577
+ else:
578
+ h = h
579
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
580
+ secondary_info_tobeprinted.append(acous)
581
+
582
+
583
+ if len(main_info) == 4 and len(secondary_info) == 2:
584
+ for coords, label, width, height, acous, fire, color in new_data:
585
+ w = re.sub(r",", "", width)
586
+ h = re.sub(r",", "", height)
587
+ if float(w).is_integer():
588
+ w = int(float(w))
589
+ else:
590
+ w = w
591
+ if float(h).is_integer():
592
+ h = int(float(h))
593
+ else:
594
+ h = h
595
+ width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
596
+ secondary_info_tobeprinted.append((acous, fire))
597
+ return width_info_tobeprinted, secondary_info_tobeprinted
598
+
599
+ def get_word_locations_plan_secondary(flattened_list, plan_texts, main_info, secondary_info):
600
+ #hena fe 7alet en keda keda fe secondary information
601
+ locations = []
602
+ not_found = []
603
+ len_main = len(main_info) #3 or #4 #sometimes maybe 2
604
+ len_secondary = len(secondary_info) #2 or #1
605
+
606
+ if len_main == 2 and len_secondary == 2:
607
+ for lbl, clr, acoustic, fire in flattened_list:
608
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
609
+ if len(location) ==0:
610
+ not_found.append(lbl)
611
+ locations.append((location, lbl, clr, acoustic, fire))
612
+
613
+ if len_main == 2 and len_secondary == 1:
614
+ for lbl, clr, acoustic in flattened_list:
615
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
616
+ if len(location) ==0:
617
+ not_found.append(lbl)
618
+ locations.append((location, lbl, clr, acoustic))
619
+
620
+
621
+
622
+ if len_main == 3 and len_secondary == 2:
623
+ for lbl, w, clr, acoustic, fire in flattened_list:
624
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
625
+ if len(location) ==0:
626
+ not_found.append(lbl)
627
+ locations.append((location, lbl, w, clr, acoustic, fire))
628
+
629
+ if len_main == 3 and len_secondary == 1:
630
+ for lbl, w, clr, acoustic in flattened_list:
631
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
632
+ if len(location) ==0:
633
+ not_found.append(lbl)
634
+ locations.append((location, lbl, w, clr, acoustic))
635
+
636
+
637
+
638
+ if len_main == 4 and len_secondary == 2:
639
+ for lbl, w, h, clr, acoustic, fire in flattened_list:
640
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
641
+ if len(location) ==0:
642
+ not_found.append(lbl)
643
+ locations.append((location, lbl, w, h, clr, acoustic, fire))
644
+
645
+ if len_main == 4 and len_secondary == 1:
646
+ for lbl, w, h, clr, acoustic in flattened_list:
647
+ location,worz, txt_pt = find_text_in_plan(lbl, plan_texts)
648
+ if len(location) ==0:
649
+ not_found.append(lbl)
650
+ locations.append((location, lbl, w, h, clr,acoustic))
651
+ return locations, not_found
652
+
653
+ ### newest, accept combined table
654
+ def get_similar_colors_all(selected_columns_new):
655
+ def generate_rgb():
656
+ return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
657
+
658
+ unique_keys = selected_columns_new['door_type'].unique()
659
+ key_colors = {key: generate_rgb() for key in unique_keys}
660
+
661
+ #Column fields
662
+ clmns_fields = selected_columns_new.columns.to_list()
663
+
664
+ def col_template():
665
+ d = {
666
+ 'values': [],
667
+ 'color': None
668
+ }
669
+ for field in clmns_fields:
670
+ d[field] = []
671
+ return d
672
+
673
+ col_dict = defaultdict(col_template)
674
+
675
+ for _, row in selected_columns_new.iterrows():
676
+ key = row['door_type']
677
+ col_dict[key]['values'].append(row['door_id'])
678
+
679
+ for field in clmns_fields:
680
+ col_dict[key][field].append(row.get(field, None))
681
+
682
+ col_dict[key]['color'] = key_colors[key]
683
+
684
+ return dict(col_dict)
685
+
686
+ ### newest, accept combined table
687
+ def get_flattened_tuples_list_all(col_dict):
688
+ exclude_fields = ['door_type', 'values']
689
+ flattened_list = []
690
+
691
+ for values_dict in col_dict.values():
692
+ # All fields that are lists and not in the excluded fields
693
+ list_fields = [k for k, v in values_dict.items()
694
+ if isinstance(v, list) and k not in exclude_fields]
695
+ n_rows = len(values_dict[list_fields[0]]) if list_fields else 0
696
+
697
+ for i in range(n_rows):
698
+ tuple_row = tuple(values_dict[field][i] for field in list_fields) + (values_dict['color'],)
699
+ flattened_list.append(tuple_row)
700
+
701
+ return flattened_list
702
+
703
+
704
+ #SECONDARY
705
+ def get_cleaned_data_secondary(locations, main_info, secondary_info):
706
+ processed = defaultdict(int)
707
+
708
+ new_data = []
709
+ if len(main_info) == 2 and len(secondary_info) == 1:
710
+ for coords, label, color, acous in locations:
711
+ if len(coords)>1:
712
+ index = processed[label] % len(coords) # Round-robin indexing
713
+ new_coord = [coords[index]] # Pick the correct coordinate
714
+ new_data.append((new_coord, label, color, acous))
715
+ processed[label] += 1 # Move to the next coordinate for this label
716
+ if len(coords)==1:
717
+ new_data.append((coords, label, color, acous))
718
+
719
+ if len(main_info) == 2 and len(secondary_info) == 2:
720
+ for coords, label, color, acous, fire in locations:
721
+ if len(coords)>1:
722
+ index = processed[label] % len(coords) # Round-robin indexing
723
+ new_coord = [coords[index]] # Pick the correct coordinate
724
+ new_data.append((new_coord, label, color, acous, fire))
725
+ processed[label] += 1 # Move to the next coordinate for this label
726
+ if len(coords)==1:
727
+ new_data.append((coords, label, color, acous, fire))
728
+
729
+
730
+ if len(main_info) == 3 and len(secondary_info) == 1:
731
+ for coords, label, width, color, acous in locations:
732
+ if len(coords)>1:
733
+ index = processed[label] % len(coords) # Round-robin indexing
734
+ new_coord = [coords[index]] # Pick the correct coordinate
735
+ new_data.append((new_coord, label, width, color, acous))
736
+ processed[label] += 1 # Move to the next coordinate for this label
737
+ if len(coords)==1:
738
+ new_data.append((coords, label, width, color, acous))
739
+
740
+ if len(main_info) == 3 and len(secondary_info) == 2:
741
+ for coords, label, width, color, acous, fire in locations:
742
+ if len(coords)>1:
743
+ index = processed[label] % len(coords) # Round-robin indexing
744
+ new_coord = [coords[index]] # Pick the correct coordinate
745
+ new_data.append((new_coord, label, width, color, acous, fire))
746
+ processed[label] += 1 # Move to the next coordinate for this label
747
+ if len(coords)==1:
748
+ new_data.append((coords, label, width, color, acous, fire))
749
+
750
+ if len(main_info) == 4 and len(secondary_info) == 1:
751
+ for coords, label, width, height, color, acous in locations:
752
+ if len(coords)>1:
753
+ index = processed[label] % len(coords) # Round-robin indexing
754
+ new_coord = [coords[index]] # Pick the correct coordinate
755
+ new_data.append((new_coord, label, width, height, color, acous))
756
+ processed[label] += 1 # Move to the next coordinate for this label
757
+ if len(coords)==1:
758
+ new_data.append((coords, label, width, height, color, acous))
759
+
760
+ if len(main_info) == 4 and len(secondary_info) == 2:
761
+ for coords, label, width, height, color, acous, fire in locations:
762
+ if len(coords)>1:
763
+ index = processed[label] % len(coords) # Round-robin indexing
764
+ new_coord = [coords[index]] # Pick the correct coordinate
765
+ new_data.append((new_coord, label, width, height, color, acous, fire))
766
+ processed[label] += 1 # Move to the next coordinate for this label
767
+ if len(coords)==1:
768
+ new_data.append((coords, label, width, height, color, acous, fire))
769
+
770
+ return new_data
771
+
772
+ def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
773
+ secondary_printed_clean = []
774
+ if len(secondary_info) == 1:
775
+ if any('acoustic' in col for col in selected_secondary_info.columns):
776
+ for acous in secondary_tobeprinted:
777
+ new_text = f"acoustic rating: {acous};"
778
+ secondary_printed_clean.append(new_text)
779
+ if any('fire' in col for col in selected_secondary_info.columns):
780
+ for fire in secondary_tobeprinted:
781
+ new_text = f"fire rating: {fire};"
782
+ secondary_printed_clean.append(new_text)
783
+ if len(secondary_info) == 2:
784
+ for fire, acous in secondary_tobeprinted:
785
+ new_text = f"fire rating: {fire}; acoustic rating: {acous};"
786
+ secondary_printed_clean.append(new_text)
787
+ print(new_text)
788
+ return secondary_printed_clean
789
+
790
+
791
+ def mix_width_secondary(widths, secondary_printed_clean):
792
+ all_print = []
793
+ for i in range(len(widths)):
794
+ newest_text = f"{widths[i]}; {secondary_printed_clean[i]}"
795
+ all_print.append(newest_text)
796
+ return all_print
797
+
798
+ def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
799
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
800
+ pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
801
+
802
+ page = pdf_document[0] # First page
803
+ if len(main_info) == 2 and len(secondary_info) == 1:
804
+ for loc in locations:
805
+ coor, lbl, acous, clr = loc
806
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
807
+ for cor in coor:
808
+ #Create a Circle annotation (Count Markup)
809
+ annot = page.add_circle_annot(
810
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
811
+ )
812
+
813
+ #Assign required Bluebeam metadata
814
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
815
+ annot.set_border(width=2) # Border thickness
816
+ annot.set_opacity(1) # Fully visible
817
+
818
+ #Set annotation properties for Bluebeam Count detection
819
+ annot.set_info("name", lbl) # Unique name for each count
820
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
821
+ annot.set_info("title", lbl) # Optional
822
+ annot.update() # Apply changes
823
+
824
+ if len(main_info) == 2 and len(secondary_info) == 2:
825
+ for loc in locations:
826
+ coor, lbl, acous, fire, clr = loc
827
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
828
+ for cor in coor:
829
+ #Create a Circle annotation (Count Markup)
830
+ annot = page.add_circle_annot(
831
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
832
+ )
833
+
834
+ #Assign required Bluebeam metadata
835
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
836
+ annot.set_border(width=2) # Border thickness
837
+ annot.set_opacity(1) # Fully visible
838
+
839
+ #Set annotation properties for Bluebeam Count detection
840
+ annot.set_info("name", lbl) # Unique name for each count
841
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
842
+ annot.set_info("title", lbl) # Optional
843
+ annot.update() # Apply changes
844
+
845
+ if len(main_info) == 3 and len(secondary_info) == 1:
846
+ for loc in locations:
847
+ if len(loc) != 5:
848
+ continue
849
+ coor, lbl, w, acous, clr = loc
850
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
851
+ for cor in coor:
852
+ #Create a Circle annotation (Count Markup)
853
+ annot = page.add_circle_annot(
854
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
855
+ )
856
+
857
+ #Assign required Bluebeam metadata
858
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
859
+ annot.set_border(width=2) # Border thickness
860
+ annot.set_opacity(1) # Fully visible
861
+
862
+ #Set annotation properties for Bluebeam Count detection
863
+ annot.set_info("name", lbl) # Unique name for each count
864
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
865
+ annot.set_info("title", lbl) # Optional
866
+ annot.update() # Apply changes
867
+
868
+ if len(main_info) == 3 and len(secondary_info) == 2:
869
+ for loc in locations:
870
+ coor, lbl, w, acous, fire, clr = loc
871
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
872
+ for cor in coor:
873
+ #Create a Circle annotation (Count Markup)
874
+ annot = page.add_circle_annot(
875
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
876
+ )
877
+
878
+ #Assign required Bluebeam metadata
879
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
880
+ annot.set_border(width=2) # Border thickness
881
+ annot.set_opacity(1) # Fully visible
882
+
883
+ #Set annotation properties for Bluebeam Count detection
884
+ annot.set_info("name", lbl) # Unique name for each count
885
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
886
+ annot.set_info("title", lbl) # Optional
887
+ annot.update() # Apply changes
888
+
889
+ if len(main_info) == 4 and len(secondary_info) == 1:
890
+ for loc in locations:
891
+ coor, lbl, w, h, acous, clr = loc
892
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
893
+ for cor in coor:
894
+ #Create a Circle annotation (Count Markup)
895
+ annot = page.add_circle_annot(
896
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
897
+ )
898
+
899
+ #Assign required Bluebeam metadata
900
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
901
+ annot.set_border(width=2) # Border thickness
902
+ annot.set_opacity(1) # Fully visible
903
+
904
+ #Set annotation properties for Bluebeam Count detection
905
+ annot.set_info("name", lbl) # Unique name for each count
906
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
907
+ annot.set_info("title", lbl) # Optional
908
+ annot.update() # Apply changes
909
+
910
+ if len(main_info) == 4 and len(secondary_info) == 2:
911
+ for loc in locations:
912
+ coor, lbl, w, h, acous, fire, clr = loc
913
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
914
+ for cor in coor:
915
+ #Create a Circle annotation (Count Markup)
916
+ annot = page.add_circle_annot(
917
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
918
+ )
919
+
920
+ #Assign required Bluebeam metadata
921
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
922
+ annot.set_border(width=2) # Border thickness
923
+ annot.set_opacity(1) # Fully visible
924
+
925
+ #Set annotation properties for Bluebeam Count detection
926
+ annot.set_info("name", lbl) # Unique name for each count
927
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
928
+ annot.set_info("title", lbl) # Optional
929
+ annot.update() # Apply changes
930
+
931
+
932
+
933
+ #Save modified PDF to a variable instead of a file
934
+ output_stream = io.BytesIO()
935
+ pdf_document.save(output_stream)
936
+ pdf_document.close()
937
+
938
+ return output_stream.getvalue() # Return the modified PDF as bytes
939
+
940
+
941
+ def modify_author_in_pypdf2(pdf_bytes, new_authors):
942
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
943
+ reader = PyPDF2.PdfReader(pdf_stream)
944
+ writer = PyPDF2.PdfWriter()
945
+
946
+ author_index = 0 # Track author assignment
947
+
948
+ for page in reader.pages:
949
+ if "/Annots" in page: #Check if annotations exist
950
+ for annot in page["/Annots"]:
951
+ annot_obj = annot.get_object()
952
+ # Assign each annotation a unique author
953
+ if len(new_authors) == 0:
954
+ break
955
+ if author_index < len(new_authors):
956
+ annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
957
+ author_index += 1 # Move to next author
958
+
959
+ # If authors list is exhausted, keep the last one
960
+ else:
961
+ annot_obj.update({"/T": TextStringObject(new_authors[-1])})
962
+
963
+ writer.add_page(page)
964
+
965
+ #Save the modified PDF to a variable
966
+ output_stream = io.BytesIO()
967
+ writer.write(output_stream)
968
+ output_stream.seek(0)
969
+
970
+ return output_stream.read()
971
+
972
+
973
+
974
+
975
+
976
+ def add_bluebeam_count_annotations(pdf_bytes, locations):
977
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
978
+ pdf_document = fitz.open("pdf", pdf_stream.read()) # Open PDF in memory
979
+
980
+ page = pdf_document[0] # First page
981
+ print(f"length of locations 0 from not sec presence: {len(locations[0])}")
982
+
983
+ for loc in locations:
984
+
985
+ if len(loc) == 3:
986
+ coor, lbl, clr = loc
987
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
988
+ for cor in coor:
989
+ #Create a Circle annotation (Count Markup)
990
+ annot = page.add_circle_annot(
991
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
992
+ )
993
+
994
+ #Assign required Bluebeam metadata
995
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
996
+ annot.set_border(width=2) # Border thickness
997
+ annot.set_opacity(1) # Fully visible
998
+
999
+ #Set annotation properties for Bluebeam Count detection
1000
+ annot.set_info("name", lbl) # Unique name for each count
1001
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1002
+ annot.set_info("title", lbl) # Optional
1003
+ annot.update() # Apply changes
1004
+ if len(loc) == 4:
1005
+ coor, lbl, clr,w = loc
1006
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1007
+ for cor in coor:
1008
+ #Create a Circle annotation (Count Markup)
1009
+ annot = page.add_circle_annot(
1010
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1011
+ )
1012
+
1013
+ #Assign required Bluebeam metadata
1014
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1015
+ annot.set_border(width=2) # Border thickness
1016
+ annot.set_opacity(1) # Fully visible
1017
+
1018
+ #Set annotation properties for Bluebeam Count detection
1019
+ annot.set_info("name", lbl) # Unique name for each count
1020
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1021
+ annot.set_info("title", lbl) # Optional
1022
+ annot.update() # Apply changes
1023
+
1024
+ if len(loc) == 5:
1025
+ coor, lbl, clr,w,h = loc
1026
+ clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
1027
+ for cor in coor:
1028
+ #Create a Circle annotation (Count Markup)
1029
+ annot = page.add_circle_annot(
1030
+ fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10) # Small circle
1031
+ )
1032
+
1033
+ #Assign required Bluebeam metadata
1034
+ annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white
1035
+ annot.set_border(width=2) # Border thickness
1036
+ annot.set_opacity(1) # Fully visible
1037
+
1038
+ #Set annotation properties for Bluebeam Count detection
1039
+ annot.set_info("name", lbl) # Unique name for each count
1040
+ annot.set_info("subject", "Count") #Bluebeam uses "Count" for Count markups
1041
+ annot.set_info("title", lbl) # Optional
1042
+ annot.update() # Apply changes
1043
+
1044
+ #Save modified PDF to a variable instead of a file
1045
+ output_stream = io.BytesIO()
1046
+ pdf_document.save(output_stream)
1047
+ pdf_document.close()
1048
+
1049
+ return output_stream.getvalue() # Return the modified PDF as bytes
1050
+
1051
+
1052
+
1053
+ def modify_author_in_pypdf2(pdf_bytes, new_authors):
1054
+ pdf_stream = io.BytesIO(pdf_bytes) # Load PDF from bytes
1055
+ reader = PyPDF2.PdfReader(pdf_stream)
1056
+ writer = PyPDF2.PdfWriter()
1057
+
1058
+ author_index = 0 # Track author assignment
1059
+
1060
+ for page in reader.pages:
1061
+ if "/Annots" in page: #Check if annotations exist
1062
+ for annot in page["/Annots"]:
1063
+ annot_obj = annot.get_object()
1064
+ # Assign each annotation a unique author
1065
+ if len(new_authors) == 0:
1066
+ break
1067
+ if author_index < len(new_authors):
1068
+ annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
1069
+ author_index += 1 # Move to next author
1070
+
1071
+ # If authors list is exhausted, keep the last one
1072
+ else:
1073
+ annot_obj.update({"/T": TextStringObject(new_authors[-1])})
1074
+
1075
+ writer.add_page(page)
1076
+
1077
+ #Save the modified PDF to a variable
1078
+ output_stream = io.BytesIO()
1079
+ writer.write(output_stream)
1080
+ output_stream.seek(0)
1081
+
1082
+ return output_stream.read()
1083
+
1084
+
1085
+
1086
+ def merge_pdf_bytes_list(pdfs):
1087
+ writer = PdfWriter()
1088
+
1089
+ for pdf_bytes in pdfs:
1090
+ pdf_stream = io.BytesIO(pdf_bytes)
1091
+ reader = PdfReader(pdf_stream)
1092
+ for page in reader.pages:
1093
+ writer.add_page(page)
1094
+
1095
+ output_stream = io.BytesIO()
1096
+ writer.write(output_stream)
1097
+ output_stream.seek(0)
1098
+
1099
+ return output_stream.read()
1100
+
1101
+
1102
+ def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
1103
+
1104
+ if isinstance(input_pdf_path, bytes):
1105
+ original_pdf_bytes = input_pdf_path
1106
+ else:
1107
+ with open(input_pdf_path, "rb") as file:
1108
+ original_pdf_bytes = file.read()
1109
+
1110
+ #Add Bluebeam-compatible count annotations
1111
+ annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(original_pdf_bytes, locations, main_info, secondary_info)
1112
+
1113
+ #Modify author field using PyPDF2
1114
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1115
+
1116
+ return final_pdf_bytes
1117
+
1118
+
1119
+ def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
1120
+ #Load original PDF
1121
+ if isinstance(input_pdf_path, bytes):
1122
+ original_pdf_bytes = input_pdf_path
1123
+ else:
1124
+ with open(input_pdf_path, "rb") as file:
1125
+ original_pdf_bytes = file.read()
1126
+
1127
+ #Add Bluebeam-compatible count annotations
1128
+ annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
1129
+
1130
+ #Modify author field using PyPDF2
1131
+ final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
1132
+ return final_pdf_bytes
1133
+
1134
+ def mainRun(schedule, plan, searcharray):
1135
+ print("mainRun is RUNNING")
1136
+
1137
+ #print(type(plan))
1138
+ eltype = type(plan)
1139
+ print(f"el type beta3 variable plan:: {eltype}")
1140
+ len_plan = len(plan)
1141
+ print(f"length of the plan's array is: {len_plan}")
1142
+ p1_type = type(plan[0])
1143
+ print(f"el mawgood fe p[0]: {p1_type}")
1144
+
1145
+ print(f"search array: {searcharray}")
1146
+
1147
+ dfs = extract_tables(schedule)
1148
+
1149
+ pdfs = []
1150
+ for p in plan:
1151
+ pdf_document = fitz.open("pdf", p)
1152
+ # Get the first page (0-indexed)
1153
+ page = pdf_document[0]
1154
+ rect = page.rect # Rectangle: contains x0, y0, x1, y1
1155
+
1156
+ width_pdf = rect.width # or: width = rect.x1 - rect.x0
1157
+ height_pdf = rect.height # or: height = rect.y1 - rect.y0
1158
+
1159
+ print(f"plan width: {width_pdf}")
1160
+ print(f"plan height: {height_pdf}")
1161
+
1162
+ all_new_data = []
1163
+ all_widths = []
1164
+ pdf_outputs = []
1165
+
1166
+ for j in range(len(searcharray)):
1167
+ user_input = searcharray[j]
1168
+
1169
+ secondary_presence = False
1170
+ if user_input[4] or user_input[5]:
1171
+ secondary_presence = True
1172
+ main_info_, secondary_info_ = separate_main_secondary(user_input)
1173
+ main_info = [item for item in main_info_ if item]
1174
+ secondary_info = [item for item in secondary_info_ if item]
1175
+ print("feh secondary information")
1176
+ if user_input[4]:
1177
+ print("Fire rate mawgooda")
1178
+ if user_input[5]:
1179
+ print("Acoustic Rate mawgooda")
1180
+ else:
1181
+ print("mafeesh secondary information")
1182
+
1183
+ selected_columns_combined = get_selected_columns_all(dfs, user_input)
1184
+ kelma = get_st_op_pattern(selected_columns_combined, user_input)
1185
+ col_dict = get_similar_colors_all(selected_columns_combined)
1186
+ flattened_list = get_flattened_tuples_list_all(col_dict)
1187
+ plan_texts = read_text(p)
1188
+
1189
+ if secondary_presence:
1190
+ plan_texts = read_text(p)
1191
+ locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
1192
+ new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
1193
+
1194
+ #Single page annotation
1195
+ all_new_data.append(new_data3)
1196
+ repeated_labels = get_repeated_labels(locations)
1197
+ if kelma == None:
1198
+ widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1199
+ else:
1200
+ width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
1201
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1202
+ widths = get_widths_bb_format(cleaned_width, kelma)
1203
+ #Handling schedules without dimensions (width and height)
1204
+ if selected_columns_combined.shape[1] == 2:
1205
+ widths = []
1206
+
1207
+ secondary_printed_clean = get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
1208
+ all_print = mix_width_secondary(widths, secondary_printed_clean)
1209
+
1210
+ #Single page annotation
1211
+ all_widths.append(all_print)
1212
+
1213
+ #flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1214
+ #flat_list_widths = [item for sublist in all_widths for item in sublist]
1215
+
1216
+ if pdf_outputs:
1217
+ final_pdf_bytes = process_pdf_secondary(pdf_outputs[j-1], "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1218
+ pdf_outputs.append(final_pdf_bytes)
1219
+ else:
1220
+ final_pdf_bytes = process_pdf_secondary(p, "final_output_multiple_input_new2.pdf", all_new_data[j], all_widths[j], main_info, secondary_info)
1221
+ pdf_outputs.append(final_pdf_bytes)
1222
+
1223
+ else:
1224
+ locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
1225
+ new_data = get_cleaned_data(locations)
1226
+ #Single page annotation
1227
+ all_new_data.append(new_data)
1228
+ repeated_labels = get_repeated_labels(locations)
1229
+ if kelma == None:
1230
+ widths = get_width_info_tobeprinted(new_data)
1231
+ else:
1232
+ width_info_tobeprinted = get_width_info_tobeprinted(new_data)
1233
+ cleaned_width = get_cleaned_width(width_info_tobeprinted)
1234
+ widths = get_widths_bb_format(cleaned_width, kelma)
1235
+
1236
+ #Handling schedules without dimensions (width and height)
1237
+ if selected_columns_combined.shape[1] == 2:
1238
+ widths = []
1239
+
1240
+ #Single page annotation
1241
+ all_widths.append(widths)
1242
+
1243
+ flat_list_new_data = [item for sublist in all_new_data for item in sublist]
1244
+ flat_list_widths = [item for sublist in all_widths for item in sublist]
1245
+
1246
+ if pdf_outputs:
1247
+ final_pdf_bytes = process_pdf(pdf_outputs[j-1], "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1248
+ pdf_outputs.append(final_pdf_bytes)
1249
+ else:
1250
+ final_pdf_bytes = process_pdf(p, "final_output_width_trial.pdf", all_new_data[j], all_widths[j])
1251
+ pdf_outputs.append(final_pdf_bytes)
1252
+
1253
+
1254
+ pdfs.append(final_pdf_bytes)
1255
+ merged_pdf = merge_pdf_bytes_list(pdfs)
1256
+ print(f"number of pges of merged_pdf is {len(merged_pdf)} and its type is {type(merged_pdf)}")
1257
+
1258
+ not_found = []
1259
+ doc2 =fitz.open('pdf',merged_pdf)
1260
+ len_doc2 = len(doc2)
1261
+ print(f"number of pges of doc2 is {len_doc2} and its type is {type(doc2)}")
1262
+ page=doc2[0]
1263
+ pix = page.get_pixmap() # render page to an image
1264
+ pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
1265
+ img=np.array(pl)
1266
+ annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
1267
+
1268
+
1269
+ list1=pd.DataFrame(columns=['content', 'id', 'subject','color'])
1270
+
1271
+ # for page in doc:
1272
+ for page in doc2:
1273
+ # Iterate through annotations on the page
1274
+ for annot in page.annots():
1275
+ # Get the color of the annotation
1276
+ annot_color = annot.colors
1277
+ if annot_color is not None:
1278
+ # annot_color is a dictionary with 'stroke' and 'fill' keys
1279
+ stroke_color = annot_color.get('stroke') # Border color
1280
+ fill_color = annot_color.get('fill') # Fill color
1281
+ if fill_color:
1282
+ v='fill'
1283
+ # print('fill')
1284
+ if stroke_color:
1285
+ v='stroke'
1286
+ x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
1287
+ list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
1288
+ return annotatedimg, doc2 , list1, repeated_labels , not_found