Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import pandas as pd | |
| import statistics | |
| from statistics import mode | |
| from PIL import Image | |
| import io | |
| import google_sheet_Legend | |
| import pypdfium2 as pdfium | |
| import fitz # PyMuPDF | |
| import os | |
| import random | |
| def get_text_from_pdf(input_pdf_path): | |
| pdf_document = fitz.open('pdf',input_pdf_path) | |
| for page_num in range(pdf_document.page_count): | |
| page = pdf_document[page_num] | |
| text_instances = page.get_text("words") | |
| page.apply_redactions() | |
| return text_instances | |
| def convert2img(path): | |
| pdf = pdfium.PdfDocument(path) | |
| page = pdf.get_page(0) | |
| pil_image = page.render().to_pil() | |
| pl1=np.array(pil_image) | |
| img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR) | |
| return img | |
| def changeWhiteColumns(img): | |
| imgCopy = img.copy() | |
| hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV) | |
| white_range_low = np.array([0,0,250]) | |
| white_range_high = np.array([0,0,255]) | |
| mask2=cv2.inRange(hsv,white_range_low, white_range_high) | |
| imgCopy[mask2>0]=(255,0,0) | |
| return imgCopy | |
| def changeGrayModify(img): | |
| hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) | |
| gray_range_low = np.array([0,0,175]) | |
| gray_range_high = np.array([0,0,199]) | |
| mask=cv2.inRange(hsv,gray_range_low,gray_range_high) | |
| img[mask>0]=(255,0,0) | |
| return img | |
| def segment_blue(gray_changed): | |
| hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV) | |
| lowerRange1 = np.array([120, 255, 255]) | |
| upperRange1 = np.array([179, 255, 255]) | |
| mask2 = cv2.inRange(hsv, lowerRange1, upperRange1) | |
| imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2) | |
| return imgResult3 | |
| def segment_brown(img): | |
| lowerRange1 = np.array([0, 9, 0]) | |
| upperRange1 = np.array([81, 255, 255]) | |
| hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) | |
| mask2 = cv2.inRange(hsv, lowerRange1, upperRange1) | |
| imgResult3 = cv2.bitwise_and(img, img, mask=mask2) | |
| return imgResult3 | |
| def threshold(imgResult3): | |
| gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9) | |
| gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY) | |
| outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] | |
| return outsu4 | |
| def get_columns_info(outsu4, img): | |
| mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255 | |
| mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255 | |
| contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE) | |
| p = [] #to save points of each contour | |
| for i, cnt in enumerate(contours): | |
| M = cv2.moments(cnt) | |
| if M['m00'] != 0.0: | |
| x1 = int(M['m10']/M['m00']) | |
| y1 = int(M['m01']/M['m00']) | |
| area = cv2.contourArea(cnt) | |
| if area > (881.0*2): | |
| perimeter = cv2.arcLength(cnt,True) | |
| #print(perimeter) | |
| cv2.drawContours(mask_walls, [cnt], -1, 0, -1) | |
| if area < (881.0 * 2) and area > 90: | |
| # maybe make it area < (881.0 * 1.5) | |
| p.append((x1,y1)) | |
| #print(area) | |
| cv2.drawContours(mask_clmns, [cnt], -1, 0, -1) | |
| return p, mask_clmns, mask_walls | |
| def getTextsPoints(x): | |
| point_list = [] | |
| pt_clm = {} | |
| for h in x: | |
| point_list.append(calculate_midpoint(h[1],h[0],h[3],h[2])) | |
| pt_clm[calculate_midpoint(h[1],h[0],h[3],h[2])] = h[4] | |
| return point_list, pt_clm | |
| def fix_90_ky_val(pt_clm, derotationMatrix): | |
| new_derotated = {} | |
| for ky in pt_clm: | |
| pts = fitz.Point(ky[0], ky[1]) * derotationMatrix | |
| new_ky = ((int(pts.y),int(pts.x))) | |
| new_derotated[new_ky] = pt_clm[ky] | |
| return new_derotated | |
| def calculate_midpoint(x1,y1,x2,y2): | |
| xm = int((x1 + x2) / 2) | |
| ym = int((y1 + y2) / 2) | |
| return (xm, ym) | |
| def getColumnsTypesKeyValue(nearbyy, pt_clm): | |
| words = [] | |
| for i in range(len(nearbyy)): | |
| words.append(pt_clm[nearbyy[i]]) | |
| return words | |
| def fix_rotation_90(pc_coordinates, derotationMatrix): | |
| coor = [] | |
| for coordinate in pc_coordinates: | |
| pts = fitz.Point(coordinate[0], coordinate[1]) * derotationMatrix | |
| coor.append((int(pts.y),int(pts.x))) | |
| return coor | |
| def distance(point1, point2): | |
| x1, y1 = point1 | |
| x2, y2 = point2 | |
| return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) | |
| def getNearestText(point_list, p): | |
| nearbyy = [] | |
| selected_clm_point = [] #save the clmn for drawing cirlce on it | |
| dis = [] | |
| txt_clmn = [] | |
| for i in range(len(p)): | |
| nearest_point = min(point_list, key=lambda point: distance(point, p[i])) | |
| dist = distance(nearest_point, p[i]) | |
| dis.append(dist) | |
| if dist < 44: | |
| nearbyy.append(nearest_point) | |
| selected_clm_point.append(p[i]) | |
| txt_clmn.append((nearest_point, p[i])) | |
| return nearbyy, selected_clm_point, txt_clmn | |
| def getColumnsTypes(nearbyy, x): | |
| found_tuple = [] | |
| # Loop through the list of tuples | |
| for i in range(len(nearbyy)): | |
| for tpl in x: | |
| if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"): | |
| found_tuple.append(tpl[4]) | |
| return found_tuple | |
| def generate_legend(found_tuple): | |
| word_freq = {} | |
| for word in found_tuple: | |
| if word in word_freq: | |
| word_freq[word] += 1 | |
| else: | |
| word_freq[word] = 1 | |
| data = word_freq | |
| df = pd.DataFrame(data.items(), columns=['Column Type', 'Count']) | |
| return df | |
| def color_groups(txtpts_ky_vlu): | |
| unique_labels = list(set(txtpts_ky_vlu.values())) | |
| def generate_rgb(): | |
| return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) # RGB tuple | |
| key_colors = {key: generate_rgb() for key in unique_labels} # Assign a unique RGB color to each key | |
| return key_colors | |
| def get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors): | |
| #Search for each word in the txt_clmn to get the word associated to it | |
| huge_list_clmn_clr_loc = [] | |
| for text_location, column_location in txt_clmn: | |
| word = txtpts_ky_vlu[text_location] | |
| huge_list_clmn_clr_loc.append((text_location, column_location, word, key_colors[word])) | |
| return huge_list_clmn_clr_loc #text_location, column_location, word, color | |
| '''def add_annotations_to_pdf(image, pdf_name, slctd_clm, columns_types_v): | |
| image_width = image.shape[1] | |
| image_height = image.shape[0] | |
| # Create a new PDF document | |
| pdf_document = fitz.open('pdf',pdf_name) | |
| page=pdf_document[0] | |
| rotationOld=page.rotation | |
| derotationMatrix=page.derotation_matrix | |
| if page.rotation!=0: | |
| rotationangle = page.rotation | |
| page.set_rotation(0) | |
| for i in range(len(slctd_clm)): | |
| x, y = slctd_clm[i] | |
| p_midpoint = fitz.Point(x, y) * derotationMatrix | |
| text = columns_types_v[i] | |
| # Create an annotation (sticky note) | |
| annot = page.add_text_annot((p_midpoint.x, p_midpoint.y), text) | |
| annot.set_border(width=0.2, dashes=(1, 2)) # Optional border styling | |
| annot.set_colors(stroke=(1, 0, 0), fill=None) # Set the stroke color to red | |
| annot.update() | |
| page.set_rotation(rotationOld) | |
| return pdf_document''' | |
| def add_annotations_to_pdf(image, pdf_name, huge_list_clmn_clr_loc): | |
| image_width = image.shape[1] | |
| image_height = image.shape[0] | |
| # Create a new PDF document | |
| pdf_document = fitz.open('pdf',pdf_name) | |
| page=pdf_document[0] | |
| rotationOld=page.rotation | |
| derotationMatrix=page.derotation_matrix | |
| if page.rotation!=0: | |
| rotationangle = page.rotation | |
| page.set_rotation(0) | |
| #for i in range(len(slctd_clm)): | |
| for text_loc, column_loc, word, clr in huge_list_clmn_clr_loc: | |
| x, y = column_loc | |
| clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255) | |
| #x, y = slctd_clm[i] | |
| p_midpoint = fitz.Point(x, y) * derotationMatrix | |
| annot = page.add_circle_annot( | |
| fitz.Rect(p_midpoint.x - 10, p_midpoint.y - 10, p_midpoint.x + 10,p_midpoint.y + 10) # Small circle | |
| ) | |
| # ✅ Assign required Bluebeam metadata | |
| annot.set_colors(stroke=clr, fill=(1, 1, 1)) # Set stroke color and fill white | |
| annot.set_border(width=2) # Border thickness | |
| annot.set_opacity(1) # Fully visible | |
| #text = columns_types_v[i] | |
| # ✅ Set annotation properties for Bluebeam Count detection | |
| annot.set_info("name", word) # Unique name for each count | |
| annot.set_info("subject", "Count") # ✅ Bluebeam uses "Count" for Count markups | |
| annot.set_info("title", word) # Optional | |
| annot.update() # Apply changes | |
| page.set_rotation(rotationOld) | |
| return pdf_document | |
| def mainfun(pdf_name,pdfpath,planname): | |
| pdf_document = fitz.open('pdf',pdf_name) | |
| page = pdf_document[0] | |
| rotation = page.rotation | |
| derotationMatrix=page.derotation_matrix | |
| texts_from_pdf = get_text_from_pdf(pdf_name) | |
| text_points, txtpts_ky_vlu = getTextsPoints(texts_from_pdf) | |
| if rotation != 0: | |
| if rotation ==90: | |
| text_points = fix_rotation_90(text_points, derotationMatrix) | |
| txtpts_ky_vlu = fix_90_ky_val(txtpts_ky_vlu, derotationMatrix) | |
| img = convert2img(pdf_name) | |
| imgResult = segment_brown(img) | |
| outsu = threshold(imgResult) | |
| column_points,mask_clmns, mask_walls = get_columns_info(outsu, img) | |
| key_colors = color_groups(txtpts_ky_vlu) | |
| if len(column_points) > 10: | |
| # BROWN COLUMNS | |
| nearby, slctd_clm, txt_clmn = getNearestText(text_points, column_points) | |
| columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu) | |
| legend = generate_legend(columns_types_v) | |
| huge_list_clmn_clr_loc = get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors) | |
| else: | |
| # BLUE COLUMNS | |
| img_blue = changeGrayModify(img) | |
| imgResult = segment_blue(img_blue) | |
| outsu = threshold(imgResult) | |
| column_points,mask_clmns, mask_walls = get_columns_info(outsu, img) | |
| nearby, slctd_clm, txt_clmn = getNearestText(text_points, column_points) | |
| columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu) | |
| legend = generate_legend(columns_types_v) | |
| huge_list_clmn_clr_loc = get_drawing_info(txt_clmn,txtpts_ky_vlu,key_colors) | |
| pdf_document = add_annotations_to_pdf(img, pdf_name, huge_list_clmn_clr_loc) | |
| page=pdf_document[0] | |
| pix = page.get_pixmap() # render page to an image | |
| pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples) | |
| img=np.array(pl) | |
| annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| legend = legend.fillna(' ') | |
| gc,spreadsheet_service,spreadsheetId, spreadsheet_url , namepathArr=google_sheet_Legend.legendGoogleSheets(legend , planname,pdfpath) | |
| list1=pd.DataFrame(columns=['content', 'id', 'subject','color']) | |
| for page in pdf_document: | |
| for annot in page.annots(): | |
| annot_color = annot.colors | |
| if annot_color is not None: | |
| stroke_color = annot_color.get('stroke') # Border color | |
| print('strokeee',stroke_color) | |
| if stroke_color: | |
| v='stroke' | |
| list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[255,0,0]] | |
| print('list1',list1) | |
| return annotatedimg, pdf_document , spreadsheet_url, list1, legend | |
| '''def mainfun(plan): | |
| texts_from_pdf = get_text_from_pdf(plan) | |
| img = convert2img(plan) | |
| imgResult = segment_brown(img) | |
| outsu = threshold(imgResult) | |
| column_points,mask_clmns, mask_walls = get_columns_info(outsu, img) | |
| if len(column_points) > 10: | |
| # BROWN COLUMNS | |
| text_points = getTextsPoints(texts_from_pdf) | |
| nearby = getNearestText(text_points, column_points) | |
| if rotation != 0: | |
| if rotation ==90: | |
| nearby = fix_rotation_90(pc_coordinates) | |
| columns_types = getColumnsTypes(nearby, texts_from_pdf) | |
| legend = generate_legend(columns_types) | |
| else: | |
| # BLUE COLUMNS | |
| img_blue = changeGrayModify(img) | |
| imgResult = segment_blue(img_blue) | |
| outsu = threshold(imgResult) | |
| column_points,mask_clmns, mask_walls = get_columns_info(outsu, img) | |
| text_points = getTextsPoints(texts_from_pdf) | |
| nearby = getNearestText(text_points, column_points) | |
| if rotation != 0: | |
| if rotation ==90: | |
| nearby = fix_rotation_90(pc_coordinates) | |
| columns_types = getColumnsTypes(nearby, texts_from_pdf) | |
| legend = generate_legend(columns_types) | |
| return legend''' |