MeasurementTesting

Sleeping

File size: 38,655 Bytes

# -*- coding: utf-8 -*-
"""Google Sheet to XML.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1T-b1N8Gq6wwbBurODzJIdhRQNeRbyBnz
"""

from googleapiclient.discovery import build
from google.oauth2 import service_account
import pygsheets
import ast
import re
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build
import pygsheets
import tsadropboxretrieval
import requests
import fitz
import numpy as np
from PyPDF2 import PdfReader, PdfWriter
from io import BytesIO
from PyPDF2.generic import NameObject, DictionaryObject, FloatObject, TextStringObject
import dropbox


def authorizeLegend():
  SCOPES = [
  'https://www.googleapis.com/auth/spreadsheets',
  'https://www.googleapis.com/auth/drive',
  'https://www.googleapis.com/auth/drive.metadata'
  ]
  credentials = service_account.Credentials.from_service_account_file('credentials.json', scopes=SCOPES)
  spreadsheet_service = build('sheets', 'v4', credentials=credentials)
  drive_service = build('drive', 'v3', credentials=credentials)
  gc = pygsheets.authorize(custom_credentials=credentials, client_secret='credentials.json')
  return spreadsheet_service,drive_service,gc

#########################
global gc
global pdfpath0
global pdftitle
global worksheetw
spreadsheet_service,drive_service,gc=authorizeLegend()
#########################
def create_new_sheet(path):
    spreadsheet_details = {
        'properties': {
            'title': path
        }
    }
    sheet = spreadsheet_service.spreadsheets().create(body=spreadsheet_details, fields='spreadsheetId').execute()
    spreadsheetId = sheet.get('spreadsheetId')
    permission1 = {
        'type': 'anyone',
        'role': 'writer',
    }
    drive_service.permissions().create(fileId=spreadsheetId, body=permission1, supportsAllDrives=True).execute()
    return spreadsheetId

#########################
def update_sheet(spreadsheetId, SimilarAreaDictionary, pdfpath):
    ws = gc.open_by_key(spreadsheetId)
    global pdfpath0
    pdfpath0=pdfpath
    worksheet = ws.worksheet(0)
    worksheet.title = 'Legend and data created'
    worksheet.clear()
    ws.create_developer_metadata('path', pdfpath)
    splittedpdfpath = re.split(r'[`\-=~!@#$%^&*()_+\[\]{};\'\\:"|<,/<>?]', pdfpath)
    if splittedpdfpath[-2].startswith('2.2') or splittedpdfpath[-2].startswith('2.1'):
        worksheet.set_dataframe(start='A1', df=SimilarAreaDictionary)
    # if splittedpdfpath[-2].startswith('2.8'):
    #     DoorsLegend(SimilarAreaDictionary,spreadsheetId)
    #     return
    
    else:
        top_header_format = [
            {'mergeCells': {  # areas
                'mergeType': 'MERGE_ROWS',
                'range': {
                    'sheetId': '0',
                    'startRowIndex': 1,
                    'endRowIndex': 2,
                    'startColumnIndex': 3,
                    'endColumnIndex': 5
                }
            }},
            {'mergeCells': {  # perimeters
                'mergeType': 'MERGE_ROWS',
                'range': {
                    'sheetId': '0',
                    'startRowIndex': 1,
                    'endRowIndex': 2,
                    'startColumnIndex': 5,
                    'endColumnIndex': 7
                }
            }},
            {'mergeCells': {  # lengths
                'mergeType': 'MERGE_ROWS',
                'range': {
                    'sheetId': '0',
                    'startRowIndex': 1,
                    'endRowIndex': 2,
                    'startColumnIndex': 7,
                    'endColumnIndex': 9
                }
            }},
            {'mergeCells': {  # legend and data created
                'mergeType': 'MERGE_ROWS',
                'range': {
                    'sheetId': '0',
                    'startRowIndex': 0,
                    'endRowIndex': 1,
                    'startColumnIndex': 0,
                    'endColumnIndex': 11
                }
            }}
        ]
        spreadsheet_service.spreadsheets().batchUpdate(spreadsheetId=spreadsheetId, body={'requests': top_header_format}).execute()
        worksheet.cell((1, 1)).value = 'Legend and Data Created'
        worksheet.cell((2, 1)).value = 'Guess'
        worksheet.cell((2, 2)).value = 'Color'
        worksheet.cell((2, 3)).value = 'Count'
        worksheet.cell((2, 4)).value = 'Areas'
        worksheet.cell((2, 6)).value = 'Perimeters'
        worksheet.cell((2, 8)).value = 'Lengths'
        worksheet.cell((2, 10)).value = 'Texts'
        worksheet.cell((2, 11)).value = 'Comments'
        second_row_data = ['Nr', 'm2', 'Total', 'm', 'Total', 'm', 'Total']
        worksheet.update_row(3, second_row_data, col_offset=2)
        if len(list(SimilarAreaDictionary['Guess']))>0:
          worksheet.update_col(1, list(SimilarAreaDictionary['Guess']), row_offset=3)
          worksheet.update_col(3, list(SimilarAreaDictionary['Occurences']), row_offset=3)
          worksheet.update_col(4, list(SimilarAreaDictionary['Area']), row_offset=3)
          worksheet.update_col(5, list(SimilarAreaDictionary['Total Area']), row_offset=3)
          worksheet.update_col(6, list(SimilarAreaDictionary['Perimeter']), row_offset=3)
          worksheet.update_col(7, list(SimilarAreaDictionary['Total Perimeter']), row_offset=3)
          worksheet.update_col(8, list(SimilarAreaDictionary['Length']), row_offset=3)
          worksheet.update_col(9, list(SimilarAreaDictionary['Total Length']), row_offset=3)
          worksheet.update_col(10, list(SimilarAreaDictionary['Texts']), row_offset=3)
          worksheet.update_col(11, list(SimilarAreaDictionary['Comments']), row_offset=3)
        
        if splittedpdfpath[-2].startswith('1.0'):
            colorsUsed = [
                [SimilarAreaDictionary['R'].iloc[i], SimilarAreaDictionary['G'].iloc[i], SimilarAreaDictionary['B'].iloc[i]]
                for i in range(len(SimilarAreaDictionary))
            ]
        elif splittedpdfpath[-2].startswith('3.2') or splittedpdfpath[-2].startswith('3.3') or splittedpdfpath[-2].startswith('2.7'):
            colorsUsed = list(SimilarAreaDictionary['Color'])
        rowsLen = len(SimilarAreaDictionary.values.tolist())
        lastcell = worksheet.cell((rowsLen + 2, 1))
        lastcellNotation = str(lastcell.address.label)
        columnsLen = len(SimilarAreaDictionary.columns.values.tolist())
        lastUsedCol = columnsLen + 1
        worksheet.adjust_column_width(start=2, end=3)
        worksheet.adjust_column_width(start=10, end=10)
        worksheet.adjust_column_width(start=4, end=9, pixel_size=60)
        startrow = 3
        endColindex = 11
        endrow = 3
        sheetId = '0'
        batch_requests = []
        for i in range(len(colorsUsed)):
            r, g, b = colorsUsed[i]
            batch_requests.append({
                "updateCells": {
                    "range": {
                        "sheetId": sheetId,
                        "startRowIndex": i + startrow,
                        "startColumnIndex": 1,
                    },
                    "rows": [
                        {
                            "values": [
                                {
                                    "userEnteredFormat": {
                                        "backgroundColor": {
                                            "red": r / 255,
                                            "green": g / 255,
                                            "blue": b / 255,
                                            "alpha": 0.4,
                                        }
                                    }
                                }
                            ]
                        }
                    ],
                    "fields": "userEnteredFormat.backgroundColor",
                }
            })
        batch_requests.append({
            "updateBorders": {
                "range": {
                    "sheetId": sheetId,
                    "startRowIndex": 0,
                    "endRowIndex": len(SimilarAreaDictionary) + endrow,
                    "startColumnIndex": 0,
                    "endColumnIndex": endColindex
                },
                "top": {
                    "style": "SOLID",
                    "width": 2,
                    "color": {
                        "red": 0.0,
                        "green": 0.0,
                        "blue": 0.0
                    }
                },
                "bottom": {
                    "style": "SOLID",
                    "width": 2,
                    "color": {
                        "red": 0.0,
                        "green": 0.0,
                        "blue": 0.0
                    }
                },
                "left": {
                    "style": "SOLID",
                    "width": 2,
                    "color": {
                        "red": 0.0,
                        "green": 0.0,
                        "blue": 0.0
                    }
                },
                "right": {
                    "style": "SOLID",
                    "width": 2,
                    "color": {
                        "red": 0.0,
                        "green": 0.0,
                        "blue": 0.0
                    }
                },
                "innerHorizontal": {
                    "style": "SOLID",
                    "width": 2,
                    "color": {
                        "red": 0.0,
                        "green": 0.0,
                        "blue": 0.0
                    }
                },
                "innerVertical": {
                    "style": "SOLID",
                    "width": 2,
                    "color": {
                        "red": 0.0,
                        "green": 0.0,
                        "blue": 0.0
                    }
                }
            }
        })
        spreadsheet_service.spreadsheets().batchUpdate(spreadsheetId=spreadsheetId, body={'requests': batch_requests}).execute()
   
    model_cell = worksheet.cell('A1')
    model_cell.set_text_format('bold', True)
    model_cell.set_horizontal_alignment(pygsheets.custom_types.HorizontalAlignment.CENTER)
    model_cell.color = (213 / 255, 219 / 255, 255 / 255)
    pygsheets.DataRange('A2', 'K2', worksheet=worksheet).apply_format(model_cell)

#########################
def legendGoogleSheets(SimilarAreaDictionary, path, pdfpath, spreadsheetId=0):
    titles = gc.spreadsheet_titles()
    if path in titles:
        ws = gc.open(path)
        global pdftitle
        pdftitle=path
        spreadsheetId = ws.id
    else:
        spreadsheetId = create_new_sheet(path)
        ws=gc.open_by_key(spreadsheetId)
    global worksheetw
    worksheetw = ws.worksheet(0) #legend
    update_sheet(spreadsheetId, SimilarAreaDictionary, pdfpath)
    spreadsheet_url = f"https://docs.google.com/spreadsheets/d/{spreadsheetId}"
    drive_service.permissions().update(transferOwnership=True, fileId=spreadsheetId, permissionId='11OfoB4Z6wOVII8mYmbnCbbqTQs7rYA65')
    
    namepathArr = [path, spreadsheetId, ws.get_developer_metadata('path')[0].value]
    return gc, spreadsheet_service, spreadsheetId, spreadsheet_url, namepathArr

#########################
def mapnametoLegend(McTName,colorsused,pdflink): 

  sectionKey = McTName.pop()
  key=sectionKey[0]
  section=sectionKey[1]

  # spreadsheet_service,drive_service,gc=authorizeLegend()
  spreadsheet_key =str(key)  # Please set the Spreadsheet ID.
  
  ws = gc.open_by_key(spreadsheet_key)
  
  # guessednamesfinal=getguessnames(gc,ws)
  sheetnames=[]
  unit=''
  # ws.add_worksheet("Summary")  # Please set the new sheet name.
  for i in ws._sheet_list:
    # print(i)
    sheetnames.append(i.title)
    # print(i.index)
  if 'XML Export Summary' in sheetnames:
    worksheetS = ws.worksheet_by_title('XML Export Summary')
  else:
    ws.add_worksheet("XML Export Summary")  # Please set the new sheet name.
  global worksheetw
  worksheetw = ws.worksheet(0) #legend
  worksheetS = ws.worksheet_by_title('XML Export Summary')
  summaryId= ws[1].id
  
  worksheetS.clear()

  countnames=0
  row0=['MC_T Name','Qty','Unit']
  worksheetS.update_row(1,row0)
  
  allrows=[]
  for i in range(len(McTName)):
    allgbnames=''
    item=''

    
    # firstpart= re.split(r'[`\-=~!@#$%^&*()_+\[\]{};\'\\:"|<,./<>?]', McTName[i][0])
    # print('kkk' ,McTName[i][2])
    if McTName[i][2].startswith('Area'):
      if section.startswith('1.0') or section.startswith('3.2') or section.startswith('3.3'):
        rowvalue=5# column 5 
      ar=0
      unit='m2'
    if McTName[i][2].startswith('Perimeter'):
      if section.startswith('1.0') or section.startswith('3.2') or section.startswith('3.3'):
        rowvalue=7# column 7
      ar=0   
      unit='m'
    if McTName[i][2].startswith('Length'):
      if section.startswith('1.0') or section.startswith('3.2') or section.startswith('3.3'):
        rowvalue=9# column 7
      ar=0   
      unit='m'
    if McTName[i][2].startswith('Count'):
      if section.startswith('1.0') or section.startswith('3.2') or section.startswith('3.3'):
        rowvalue=3# column 7

      ar=0   
      unit='Nr'
 
    if isinstance(McTName[i][1], list):
      guessednames=worksheetw.get_col(1, returnas='matrix', include_tailing_empty=False)

      for m in McTName[i][1]:
        if m:
          if m.startswith('text1'):
            name=m.removeprefix('text1')

            allgbnames+= name +' + '
            indices = [o for o, x in enumerate(guessednames) if x == name]
            # print(indices)
            for j in range(len(indices)):
              # print('kjjjj',roww[j])
              ar+=float(worksheetw.cell((indices[j]+1 ,rowvalue)).value) 
          else:
              if section.startswith('3.2') or section.startswith('3.3'):
                item+=m
              else: 
                item+=m + ' ,'
              # print(item)
          if section.startswith('3.2') or section.startswith('3.3'):
            n= McTName[i][0] + ' ( '+ allgbnames[:-2]+ ' )  '
          else:
            n= McTName[i][0] + ' ( '+ allgbnames[:-2] +' , ' + item[:-1]  + ' )  '
    else:
      if McTName[i][1].startswith('text1'):
        name=McTName[i][1].removeprefix('text1')
        allgbnames+= name

        roww=worksheetw.find(name)
        for j in range(len(roww)):
          # print('kjjjj',roww[j])
          ar+=float(worksheetw.cell((roww[j].row ,rowvalue)).value) 
        n= McTName[i][0] + ' ( '+ allgbnames + ' )  ' 

    rowi=[str(n),ar,unit]

    allrows.append([rowi[0],unit])
    
    worksheetS.update_row(i+2,rowi)
    # worksheetS.adjust_column_width(start=1,end=4)
    worksheetS.adjust_column_width(start=1,end=1, pixel_size=350)
    worksheetS.adjust_column_width(start=2,end=2, pixel_size=100)
    worksheetS.adjust_column_width(start=3,end=3)
  guessednamesfinal=getguessnames(worksheetw,colorsused) #gb,color
  mctname_guessedNames=label_MC_Tname(allrows) #mctname, gb1
  mappedMct_colors=mctname_colors(guessednamesfinal,mctname_guessedNames) #mctname,gb1
  adjustannotations(pdflink, mappedMct_colors)

  xx=(worksheetS.cell( ( len(McTName) +1 ,3)) ).address.label
  model_cell1 =worksheetS.cell('A2')
  model_cell1.set_horizontal_alignment( pygsheets.custom_types.HorizontalAlignment.LEFT )
  pygsheets.DataRange('A2', str(xx), worksheet=worksheetS).apply_format(model_cell1)


  model_cell =worksheetS.cell('A1')
  model_cell.set_text_format('bold', True)
  model_cell.set_horizontal_alignment( pygsheets.custom_types.HorizontalAlignment.CENTER )
  pygsheets.DataRange('A1','C1', worksheet=worksheetS).apply_format(model_cell)
  
  body2={
  "requests": [
    {
      "updateBorders": {
        "range": {
          "sheetId": str(summaryId),
          "startRowIndex": 0,
          "endRowIndex":  len(McTName) +1 ,
          "startColumnIndex": 0,
          "endColumnIndex": 3
        },
        "top": {
          "style": "SOLID",
          "width": 2,
          "color": {
            "red": 0.0,
            "green":0.0,
            "blue":0.0
          },
        },
        "bottom": {
          "style": "SOLID",
          "width": 2,
          "color": {
            "red": 0.0,
            "green":0.0,
            "blue":0.0
          },
        },
        "left":{
          "style": "SOLID",
          "width":2,
          "color": {
            "red": 0.0,
            "green":0.0,
            "blue":0.0
          },
        },
        "right":{
          "style": "SOLID",
          "width": 2,
          "color": {
            "red": 0.0,
            "green":0.0,
            "blue":0.0
          },
        },
        "innerHorizontal":{
          "style": "SOLID",
          "width":2,
          "color": {
            "red": 0.0,
            "green":0.0,
            "blue":0.0
          },
        },
        "innerVertical": {
          "style": "SOLID",
          "width": 2,
          "color": {
            "red": 0.0,
            "green":0.0,
            "blue":0.0
          },
        },
      }
    }
  ]
  }
  spreadsheet_service.spreadsheets().batchUpdate(spreadsheetId=spreadsheet_key, body=body2).execute()
  return summaryId #,guessednamesfinal   

  # print(x,xarea)

#########################
def label_MC_Tname(mctname_names):
  # print('mctname_names',mctname_names)
  mctname_guessedNames=[]
  # unit=mctname_names[1]
  for row in mctname_names:
    text=row[0]
    unit=row[1]
    # print(text,unit)
    text_before_parenthesis = text.split('(')[0].strip()
    # Step 2: Extract text inside the parentheses
    inside_parentheses = re.search(r'\((.*?)\)', text)
    if inside_parentheses:
        inside_parentheses = inside_parentheses.group(1).strip()
        if '+' in inside_parentheses:
            # Split the content by '+', and strip any leading/trailing spaces
            parts = [part.strip() for part in inside_parentheses.split('+')]
        else:
            # If no '+', just put the whole content as a single part
            parts = [inside_parentheses]
        mctname_guessedNames.append([text_before_parenthesis,parts , unit])
        # print('mctname_guessedNames',mctname_guessedNames)
  return mctname_guessedNames  
  
#########################
def mctname_colors(color_dict, mctname_gb):
    
    # print('color_dict', color_dict)
    # print('mctname_gb', mctname_gb)

    # Convert the color_dict array into a dictionary with trimmed keys for easier lookup
    color_dict_map = {color[0].strip(): color[1] for color in color_dict}
    # print('color_dict_map', color_dict_map)

    # Result list to store the final output
    result = []

    # Processing the items
    for item in mctname_gb:
        name = item[0]
        colors = item[1]
        unit=item[2]
        # print(name,colors)
        # Get RGB values for the corresponding colors
        color_values = []
        for color in colors:
            trimmed_color = color.strip()  # Trim any leading/trailing spaces
            if trimmed_color in color_dict_map:
                color_values.append(color_dict_map[trimmed_color])
            else:
                # Handle missing references (e.g., append None or a default color)
                color_values.append(None)  # Or append a default color like (0, 0, 0)

        # Join the name with the corresponding RGB colors
        result.append([name, color_values,unit])

    # Print the result
    # print('result', result)
    return result



#########################
def getguessnames(worksheetw,colorsused):
  guessednamesfinal=[]
  # print('colorsused',colorsused)
  guessednames=worksheetw.get_col(1, returnas='matrix', include_tailing_empty=False)
  # print(guessednames)
  for i in range(len(guessednames[3:])):
      item = guessednames[3:][i]  # Name from the guessednames list

      colorforitem = colorsused[i]  # Get the corresponding color for the current item      
      if item not in guessednamesfinal:
          guessednamesfinal.append([item, colorforitem]) 
  # print('guessednamesfinal', guessednamesfinal)
  return guessednamesfinal

#########################
def is_color_within_tolerance(color1, color2, tolerance):
    # Ensure both colors are tuples of integers
    color1 = tuple(map(int, color1))
    color2 = tuple(map(int, color2))
    
    return all(abs(c1 - c2) <= tolerance for c1, c2 in zip(color1, color2))

#########################
def deletefromlegend(deletedrows, SimilarAreaDictionarycopy, section, areaPermArr=[]):
    items = []
    # print('deletefromlegend',deletedrows)
    idx = 0
    if section.startswith('1.0') or section.startswith('3.2') or section.startswith('3.3'):
        areaPermArr = ast.literal_eval(areaPermArr)
        print(areaPermArr)
    myDict = eval(SimilarAreaDictionarycopy)
    SimilarAreaDictionarycopy = pd.DataFrame(myDict)
    strings = deletedrows['content']
    print('content',deletedrows)
    colors = deletedrows['color']
    indicies_toDelete=[]
    # print(colors)
    # Define your tolerance value
    tolerance = 2 # Allowable tolerance for RGB differences
    # print(SimilarAreaDictionarycopy)
    color_list = list(SimilarAreaDictionarycopy['Color'])  # Convert Index/Series to list
    if section.startswith('1.0'):      
        
       color_list = [ast.literal_eval(color) for color in color_list]
        
    for j in range(len(colors)):
        # newitem=str(colors[j]).split('\n \n')
        # input_str = " ".join(str(newitem).split())
        color = tuple(colors[j])  # Ensure 'color' is in tuple format
        found = False

        for idx, existing_color in enumerate(color_list):
            existing_color = tuple(existing_color)  # Ensure it's a tuple for comparison

               
            # print('eee',existing_color,color)
            if is_color_within_tolerance(existing_color, color, tolerance):
                print(f'Color {color} found close to {existing_color} at index {idx}')
                found = True
                # print('strings',strings[j])
                # matchA = re.search(r"Area=(\d+\.\d+)", strings[j])
                # matchP = re.search(r"Perimeter=(\d+\.\d+)", strings[j])


                # else:
                #   matchL = re.search(r"(\d+\.\d+)\s*m(?![a-zA-Z])", strings[j])

                comment = SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Comments')]
                occ = SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')]

                # Only subtract area if the area value is found
                if "Area" in deletedrows['subject'][j]:
                    matchA = re.search(r"(\d+\.\d+)\s*sq\s*m",strings[j])
                    print('matchA')
                    # print(' SimilarAreaDictionaryArea',  float(matchA.group(1)),   SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Area')])
                    SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Area')] =SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Area')] - float(matchA.group(1))
                    # Update area occurrences
                    if pd.notna(comment) and 'Area' in str(comment):
                        matches = re.findall(r'\b\d+\b', str(comment))
                        area_occurrences = int(matches[0]) - 1
                        perimeter_occurrences = int(matches[1])
                    else:
                        area_occurrences = int(SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')]) - 1
                        perimeter_occurrences = SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')]

                    SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Comments')] = f'Area occurrences: {area_occurrences}, Perimeter occurrences: {perimeter_occurrences}'
                if "Perimeter" in deletedrows['subject'][j]:
                    matchP = re.search(r"(\d+\.\d+)\s*m(?![a-zA-Z])", strings[j])
                    print('matchP')
                    SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Perimeter')] =SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Perimeter')]  - float(matchP.group(1))# Replace 'Area' with the actual column name
                    if pd.notna(comment) and 'Perimeter' in str(comment):
                      matches = re.findall(r'\b\d+\b', str(comment))
                      area_occurrences = int(matches[0])
                      perimeter_occurrences = int(matches[1]) -1
                    else:       

                      area_occurrences = int(SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')]) 
                      perimeter_occurrences = SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')]-1
                      # print(area_occurrences,perimeter_occurrences)
                      SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Comments')] = f'Area occurrences: {area_occurrences}, Perimeter occurrences: {perimeter_occurrences}'
                else:
                      matchL = re.search(r"(\d+\.\d+)\s*m(?![a-zA-Z])", strings[j])  



                # Handle occurrences and row deletion
                if area_occurrences==0 and perimeter_occurrences==0:
                    # if area_occurrences ==0 and perimeter_occurrences==0:
                      if str(idx) not in indicies_toDelete:
                        indicies_toDelete.append(str(idx))
                    # print(SimilarAreaDictionarycopy.index[idx],idx)
                    

                else:
                    SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Comments')] = f'Area occurrences: {area_occurrences}, Perimeter occurrences: {perimeter_occurrences}'
                    if area_occurrences > perimeter_occurrences:
                        SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')] = area_occurrences
                    elif perimeter_occurrences > area_occurrences:
                        SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')] = perimeter_occurrences
                    else:
                        SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Occurences')] = area_occurrences
                    if area_occurrences==1:
                       SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Area')]= SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Area')] 
                    if perimeter_occurrences==1:
                       SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Perimeter')]= SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Perimeter')] 
                    if area_occurrences==perimeter_occurrences:
                       if section.startswith('1.0'):
                          SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Length')] =SimilarAreaDictionarycopy.iloc[int(idx), SimilarAreaDictionarycopy.columns.get_loc('Total Length')]  - float(matchL.group(1))# Replace 'Area' with the actual column name
                break

        if not found:
            print(f'Color {color} not found within tolerance')
    # print('indicies_toDelete',indicies_toDelete)

    SimilarAreaDictionarycopy.drop(index=indicies_toDelete, axis=0, inplace=True)
    # print('SimilarAreaDictionarycopy',SimilarAreaDictionarycopy)
    return SimilarAreaDictionarycopy

#########################
def DoorsLegend(Dictionary,spreadsheetId):
    print('id',spreadsheetId)
    global worksheetw
    worksheetw.clear()
    top_header_format = [
            {'mergeCells': {  # legend and data created
                'mergeType': 'MERGE_ROWS',
                'range': {
                    'sheetId': '0',
                    'startRowIndex': 0,
                    'endRowIndex': 1,
                    'startColumnIndex': 0,
                    'endColumnIndex': 2
                }
            }}
    ]
    print('here')
    # spreadsheet_service.spreadsheets().batchUpdate(spreadsheetId=spreadsheetId, body={'requests': top_header_format}).execute()
    worksheetw.cell((1, 1)).value = 'Legend and Data Created'
    worksheetw.set_dataframe(start='A2', df=Dictionary)
    print('here1')
    body2={ 
      "requests": [
        {
          "updateBorders": {
            "range": {
              "sheetId": str(0),
              "startRowIndex": 0,
              "endRowIndex":  4,
              "startColumnIndex": 0,
              "endColumnIndex": 2
            },
            "top": {
              "style": "SOLID",
              "width": 2,
              "color": {
                "red": 0.0,
                "green":0.0,
                "blue":0.0
              },
            },
            "bottom": {
              "style": "SOLID",
              "width": 2,
              "color": {
                "red": 0.0,
                "green":0.0,
                "blue":0.0
              },
            },
            "left":{
              "style": "SOLID",
              "width":2,
              "color": {
                "red": 0.0,
                "green":0.0,
                "blue":0.0
              },
            },
            "right":{
              "style": "SOLID",
              "width": 2,
              "color": {
                "red": 0.0,
                "green":0.0,
                "blue":0.0
              },
            },
            "innerHorizontal":{
              "style": "SOLID",
              "width":2,
              "color": {
                "red": 0.0,
                "green":0.0,
                "blue":0.0
              },
            },
            "innerVertical": {
              "style": "SOLID",
              "width": 2,
              "color": {
                "red": 0.0,
                "green":0.0,
                "blue":0.0
              },
            },
          }
        }
      ]
    }
    print('here2')
    # spreadsheet_service.spreadsheets().batchUpdate(spreadsheetId=spreadsheetId, body=body2).execute()
    model_cell = worksheetw.cell('A1')
    model_cell.set_text_format('bold', True)
    model_cell.set_horizontal_alignment(pygsheets.custom_types.HorizontalAlignment.CENTER)
    model_cell.color = (213 / 255, 219 / 255, 255 / 255)
    pygsheets.DataRange('A2', 'B2', worksheet=worksheetw).apply_format(model_cell)
    print('here3')
    return Dictionary
#########################

def deletedoors(deletedrows,dictionary):

  items=[]
  # print('dletefromlegend2.8')
  idx=0
  myDict=eval(dictionary)
  df_doors=pd.DataFrame(myDict)
  strings=deletedrows['content']
  print('strings',strings)
  for item in strings:
    newitem=str(item).split('\n \n')
    if 'Single' in str(newitem):
      current_value = df_doors.loc[df_doors['Type'] == 'Single Doors', 'Quantity'].values[0]
      new_value = int(current_value) - 1
      df_doors.loc[df_doors['Type'] == 'Single Doors', 'Quantity'] = str(new_value)
    elif 'Double' in str(newitem):
      current_value = df_doors.loc[df_doors['Type'] == 'Double Doors', 'Quantity'].values[0]
      new_value = int(current_value) - 1
      df_doors.loc[df_doors['Type'] == 'Double Doors', 'Quantity'] = str(new_value)
  df_doors = df_doors[['Type', 'Quantity']]
  print(df_doors)
  return df_doors

#########################

def deletemarkups(list1, dbPath, path):
    '''list1 : original markup pdf
       list2 : deleted markup pdf
       deletedrows : deleted markups - difference between both dfs
    '''

    myDict1 = eval(list1)
    list1 = pd.DataFrame(myDict1)

    dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
    
    md, res = dbxTeam.files_download(path=dbPath + path)
    data = res.content
    doc = fitz.open("pdf", data)
    
    # Prepare a DataFrame for the annotations in the new PDF
    list2 = pd.DataFrame(columns=['content', 'id', 'subject', 'color'])

    for page in doc:
        # Iterate through annotations on the page
        for annot in page.annots():
            # Get the color of the annotation
            annot_color = annot.colors
            if annot_color is not None:
                # Check for fill or stroke color
                stroke_color = annot_color.get('stroke')
                fill_color = annot_color.get('fill')

                v = 'stroke' if stroke_color else 'fill'
                color = annot_color.get(v)
                if color:
                    # Convert color to tuple and multiply by 255 to get RGB values
                    color_tuple = (int(color[0] * 255), int(color[1] * 255), int(color[2] * 255))
                    # Append annotation data to list2
                    list2.loc[len(list2)] = [annot.info['content'], annot.info['id'], annot.info['subject'], color_tuple]

    # Ensure that colors are stored as tuples (which are hashable)
    list1['color'] = list1['color'].apply(lambda x: tuple(x) if isinstance(x, list) else x)
    
    # Find the deleted rows by checking the difference between original and current annotations
    deletedrows = pd.concat([list1, list2]).drop_duplicates(keep=False)

    # print(deletedrows, len(deletedrows))
    flag = 0
    if len(deletedrows) != 0:
        flag = 1
        deletedrows = deletedrows[['content', 'id', 'subject', 'color']]
        # Drop rows where 'content' starts with 'Scale'
        deletedrows = deletedrows.drop(deletedrows.index[deletedrows['content'].str.startswith('Scale')])
    else:
        flag = 0
    
    return deletedrows

############################


def adjustannotations(pdflink, color_items):
    # Load the input PDF
  pdf_content = None
  if pdflink and ('http' in pdflink or 'dropbox' in pdflink):
    # Modify Dropbox link for direct download
    # print('pdffff',pdflink)
    if 'dl=0' in pdflink:
      pdflink = pdflink.replace('dl=0', 'dl=1')

      # Download the PDF content from the shareable link
      response = requests.get(pdflink)
      pdf_bytes_io = BytesIO(response.content)  # Store the content in memory
      # pdf_bytes_io = BytesIO(OutputPdfStage1)
      reader = PdfReader(pdf_bytes_io)
      writer = PdfWriter()

      # Append all pages to the writer
      writer.append_pages_from_reader(reader)

      # Add metadata (optional)
      metadata = reader.metadata
      writer.add_metadata(metadata)

      # Convert color_items array to a more accessible structure (a dictionary of colors)
      color_dict = {}
      for item in color_items:
          label = item[0]
          colors = item[1]
          unit = item[2]
          # print('UNIT', unit)
          for color in colors:
              color_dict[tuple(color)] = (label, unit)  # Map RGB color to a tuple (label, unit)

      # print(color_dict)

      # Iterate over pages to check annotations and update label
      for page_index, page in enumerate(writer.pages):
          if "/Annots" in page:
              annotations = page["/Annots"]
              for annot_index, annot in enumerate(annotations):
                  obj = annot.get_object()

                  # Only proceed if the annotation has a color and is a valid markup (e.g., Polygon)
                  if "/C" in obj:
                      color = tuple(obj["/C"])  # Extract the RGB color (list of 3 values)
                      color = (int(round(color[0] * 255, 1)), int(round(color[1] * 255, 1)), int(round(color[2] * 255, 1)))

                      # If the color matches any item in color_dict, update the label
                      if color in color_dict:
                          label, unit = color_dict[color]  # Retrieve both label and unit
                          # print(unit)
                          # Update annotation label (e.g., /Contents or /Subj)
                          if "/Contents" in obj:
                              if unit == 'm2' and "sq m" in obj["/Contents"]:
                                  obj.update({
                                      NameObject("/Label"): TextStringObject(f"{label}")
                                  })
                              elif unit == 'm' and "m" in obj["/Contents"] and "sq" not in obj["/Contents"]:  # Ensure it's not an area
                                  obj.update({
                                      NameObject("/Label"): TextStringObject(f"{label}")
                                  })
                          # print(f"Updated Annotation: {obj}")

     # Save the modified PDF
      output_pdf_io = BytesIO()
      writer.write(output_pdf_io)
      output_pdf_io.seek(0)

      # Save the content to a file (avoid using the same name for the variable and file object)
      OutputPdfStage2=output_pdf_io.read()
      doc2 =fitz.open('pdf',OutputPdfStage2)
      dbPath='/TSA JOBS/ADR Test'+pdfpath0+'Measured Plan/'+pdftitle
      # dburl=tsadropboxretrieval.uploadanyFile(doc=doc2,pdfname=pdftitle ,path=dbPath)
      # print('omarpdf',dburl)
      print("Annotations updated and saved")
      dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
      try:
          # Upload the new file and overwrite the existing one
          dbxTeam.files_upload(doc2.write() ,dbPath, mode=dropbox.files.WriteMode('overwrite'))
              # dbxTeam.files_upload(f.read(), dropbox_file_path, mode=dropbox.files.WriteMode('overwrite'))
          print(f"File replaced successfully at")
      except dropbox.exceptions.ApiError as e:
          print(f"Error replacing the file: {e}")
      # return dburl

def getallguessednames():
    guessednamesfinal = []
    # Retrieve column data from the worksheet
    global worksheetw
    guessednames = worksheetw.get_col(1, returnas='matrix', include_tailing_empty=False)
    if len(guessednames[3:])>0:
        # Skip the first three entries and filter out empty/whitespace strings
        for item in guessednames[3:]:
            if item.strip() and item not in guessednamesfinal:  # Remove surrounding whitespace and check for uniqueness
                guessednamesfinal.append(item)
        print('guessednamesfinal:', guessednamesfinal)
        return guessednamesfinal
    else:
       return 'None'