MeasurementTesting

Sleeping

File size: 9,343 Bytes

import cv2
import numpy as np
import pandas as pd
import statistics
from statistics import mode
from PIL import Image
import io
import google_sheet_Legend
import pypdfium2 as pdfium
import fitz  # PyMuPDF
import os

def get_text_from_pdf(input_pdf_path):
    pdf_document = fitz.open('pdf',input_pdf_path)

    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        text_instances = page.get_text("words")

        page.apply_redactions()
    return text_instances

def convert2img(path):
    pdf = pdfium.PdfDocument(path)
    page = pdf.get_page(0)
    pil_image = page.render().to_pil()
    pl1=np.array(pil_image)
    img = cv2.cvtColor(pl1, cv2.COLOR_RGB2BGR)
    return img

def changeWhiteColumns(img):
  imgCopy = img.copy()
  hsv = cv2.cvtColor(imgCopy, cv2.COLOR_BGR2HSV)
  white_range_low = np.array([0,0,250])
  white_range_high = np.array([0,0,255])
  mask2=cv2.inRange(hsv,white_range_low, white_range_high)
  imgCopy[mask2>0]=(255,0,0)
  return imgCopy

def changeGrayModify(img):
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

  gray_range_low = np.array([0,0,175])
  gray_range_high = np.array([0,0,199])

  mask=cv2.inRange(hsv,gray_range_low,gray_range_high)
  img[mask>0]=(255,0,0)
  return img

def segment_blue(gray_changed):
  hsv = cv2.cvtColor(gray_changed, cv2.COLOR_BGR2HSV)

  lowerRange1 = np.array([120, 255, 255])
  upperRange1 = np.array([179, 255, 255])
  mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
  imgResult3 = cv2.bitwise_and(gray_changed, gray_changed, mask=mask2)

  return imgResult3

def segment_brown(img):
  lowerRange1 = np.array([0, 9, 0])
  upperRange1 = np.array([81, 255, 255])
  hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
  mask2 = cv2.inRange(hsv, lowerRange1, upperRange1)
  imgResult3 = cv2.bitwise_and(img, img, mask=mask2)
  return imgResult3

def threshold(imgResult3):
  gaus4 = cv2.GaussianBlur(imgResult3, (3,3),9)
  gray4 = cv2.cvtColor(gaus4, cv2.COLOR_BGR2GRAY)
  outsu4 = cv2.threshold(gray4, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
  return outsu4

def get_columns_info(outsu4, img):
  mask_clmns = np.ones(img.shape[:2], dtype="uint8") * 255
  mask_walls = np.ones(img.shape[:2], dtype="uint8") * 255
  contours, hierarchy = cv2.findContours(image=outsu4, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_NONE)
  p = [] #to save points of each contour
  for i, cnt in enumerate(contours):
    M = cv2.moments(cnt)
    if M['m00'] != 0.0:
        x1 = int(M['m10']/M['m00'])
        y1 = int(M['m01']/M['m00'])

    area = cv2.contourArea(cnt)
    if area > (881.0*2):
      perimeter = cv2.arcLength(cnt,True)
      #print(perimeter)
      cv2.drawContours(mask_walls, [cnt], -1, 0, -1)

    if area < (881.0 * 2) and area > 90:
      # maybe make it area < (881.0 * 1.5)
      p.append((x1,y1))
      #print(area)
      cv2.drawContours(mask_clmns, [cnt], -1, 0, -1)
  return p, mask_clmns, mask_walls

def getTextsPoints(x):
  point_list = []
  pt_clm = {}
  for h in x:
    point_list.append(calculate_midpoint(h[1],h[0],h[3],h[2]))
    pt_clm[calculate_midpoint(h[1],h[0],h[3],h[2])] = h[4]
  return point_list, pt_clm

def fix_90_ky_val(pt_clm, derotationMatrix):
  new_derotated = {}
  for ky in pt_clm:
    pts = fitz.Point(ky[0], ky[1]) * derotationMatrix
    new_ky = ((int(pts.y),int(pts.x)))
    new_derotated[new_ky] = pt_clm[ky]
  return new_derotated

def calculate_midpoint(x1,y1,x2,y2):
  xm = int((x1 + x2) / 2)
  ym = int((y1 + y2) / 2)
  return (xm, ym)

def getColumnsTypesKeyValue(nearbyy, pt_clm):
  words = []
  for i in range(len(nearbyy)):
    words.append(pt_clm[nearbyy[i]])
  return words

def fix_rotation_90(pc_coordinates, derotationMatrix):
  coor = []
  for coordinate in pc_coordinates:
    pts = fitz.Point(coordinate[0], coordinate[1]) * derotationMatrix
    coor.append((int(pts.y),int(pts.x)))
  return coor

def distance(point1, point2):
    x1, y1 = point1
    x2, y2 = point2
    return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

def getNearestText(point_list, p):
  nearbyy = []
  selected_clm_point = [] #save the clmn for drawing cirlce on it 
  dis = []
  for i in range(len(p)):
    nearest_point = min(point_list, key=lambda point: distance(point, p[i]))
    dist = distance(nearest_point, p[i])
    dis.append(dist)
    if dist < 44:
      nearbyy.append(nearest_point)
      selected_clm_point.append(p[i])
  return nearbyy, selected_clm_point


def getColumnsTypes(nearbyy, x):
  found_tuple = []
  # Loop through the list of tuples
  for i in range(len(nearbyy)):
    for tpl in x:
      if (tpl[2] == nearbyy[i][0] and tpl[3] == nearbyy[i][1]) and tpl[4].startswith("C"):
        found_tuple.append(tpl[4])
  return found_tuple

def generate_legend(found_tuple):
  word_freq = {}
  for word in found_tuple:
    if word in word_freq:
        word_freq[word] += 1
    else:
        word_freq[word] = 1
  data = word_freq
  df = pd.DataFrame(data.items(), columns=['Column Type', 'Count'])
  return df

def add_annotations_to_pdf(image, pdf_name, slctd_clm, columns_types_v):
    image_width = image.shape[1]
    image_height = image.shape[0]
    # Create a new PDF document
    pdf_document = fitz.open('pdf',pdf_name)
    page=pdf_document[0]
    rotationOld=page.rotation
    derotationMatrix=page.derotation_matrix
    if page.rotation!=0:
        rotationangle = page.rotation
        page.set_rotation(0)
    for i in range(len(slctd_clm)):
        x, y = slctd_clm[i]
        p_midpoint = fitz.Point(x, y) * derotationMatrix
        text = columns_types_v[i]
        # Create an annotation (sticky note)
        annot = page.add_text_annot((p_midpoint.x, p_midpoint.y), text)
        annot.set_border(width=0.2, dashes=(1, 2))  # Optional border styling
        annot.set_colors(stroke=(1, 0, 0), fill=None)  # Set the stroke color to red
        annot.update()
    page.set_rotation(rotationOld)
    return pdf_document

def mainfun(pdf_name,pdfpath,planname):
  pdf_document = fitz.open('pdf',pdf_name)
  page = pdf_document[0]
  rotation = page.rotation
  derotationMatrix=page.derotation_matrix
  texts_from_pdf = get_text_from_pdf(pdf_name)
  text_points, txtpts_ky_vlu = getTextsPoints(texts_from_pdf)
  if rotation != 0:
    if rotation ==90:
      text_points = fix_rotation_90(text_points, derotationMatrix)
      txtpts_ky_vlu = fix_90_ky_val(txtpts_ky_vlu, derotationMatrix)

  img = convert2img(pdf_name)
  imgResult = segment_brown(img)
  outsu = threshold(imgResult)
  column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)

  if len(column_points) > 10:
      # BROWN COLUMNS
      nearby, slctd_clm = getNearestText(text_points, column_points)
      columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
      legend = generate_legend(columns_types_v)

  else:
      # BLUE COLUMNS
      img_blue = changeGrayModify(img)
      imgResult = segment_blue(img_blue)
      outsu = threshold(imgResult)
      column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
      nearby, slctd_clm = getNearestText(text_points, column_points)
      columns_types_v = getColumnsTypesKeyValue(nearby, txtpts_ky_vlu)
      legend = generate_legend(columns_types_v)
  pdf_document = add_annotations_to_pdf(img, pdf_name, slctd_clm, columns_types_v)
  page=pdf_document[0]
  pix = page.get_pixmap()  # render page to an image
  pl=Image.frombytes('RGB', [pix.width,pix.height],pix.samples)
  img=np.array(pl)
  annotatedimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    
  legend = legend.fillna(' ')
  gc,spreadsheet_service,spreadsheetId, spreadsheet_url , namepathArr=google_sheet_Legend.legendGoogleSheets(legend , planname,pdfpath)
  list1=pd.DataFrame(columns=['content',  'id',  'subject','color'])
  for page in pdf_document:  
      for annot in page.annots():
          annot_color = annot.colors
          if annot_color is not None:
              stroke_color = annot_color.get('stroke')  # Border color
              print('strokeee',stroke_color)
              if stroke_color:
                v='stroke'
              list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[255,0,0]]
    
    
  print('list1',list1)
  return annotatedimg, pdf_document , spreadsheet_url, list1, legend 

'''def mainfun(plan):
  texts_from_pdf = get_text_from_pdf(plan)
  img = convert2img(plan)
  imgResult = segment_brown(img)
  outsu = threshold(imgResult)
  column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
  if len(column_points) > 10:
    # BROWN COLUMNS
    text_points = getTextsPoints(texts_from_pdf)
    nearby = getNearestText(text_points, column_points)
    if rotation != 0:
      if rotation ==90:
        nearby = fix_rotation_90(pc_coordinates)
    columns_types = getColumnsTypes(nearby, texts_from_pdf)
    legend = generate_legend(columns_types)
  else:
    # BLUE COLUMNS
    img_blue = changeGrayModify(img)
    imgResult = segment_blue(img_blue)
    outsu = threshold(imgResult)
    column_points,mask_clmns, mask_walls = get_columns_info(outsu, img)
    text_points = getTextsPoints(texts_from_pdf)
    nearby = getNearestText(text_points, column_points)
    if rotation != 0:
      if rotation ==90:
        nearby = fix_rotation_90(pc_coordinates)
    columns_types = getColumnsTypes(nearby, texts_from_pdf)
    legend = generate_legend(columns_types)
  return legend'''