LatestDuplicate_Working

Paused

App Files Files Community

Marthee commited on Sep 28, 2025

Commit

b36adba

verified ·

1 Parent(s): 4e64944

Update Doors_Schedule.py

Browse files

Files changed (1) hide show

Doors_Schedule.py +246 -530

Doors_Schedule.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from collections import defaultdict
 import pandas as pd
 import random
 import math
@@ -116,7 +117,6 @@ def generate_current_table_without_cropping(clm_idx, clmn_name, df):
   return selected_df
 def crop_rename_table(indices, clmn_name, clmn_idx,df):
   #crop_at = (max(set(indices), key=indices.count)) + 1
   crop_at =  max(indices) + 1
@@ -292,8 +292,10 @@ def get_column_name(user_input_m):
   # fixed column names
   fixed_list = ["door_id", "door_type", "width", "height"]
   for i in range(len(empty_indices)):
-    if empty_indices[i] == 3:
-      fixed_list[2] = "structural_opening"
     fixed_list[empty_indices[i]] = ""
   #finalize the column name structure
@@ -455,8 +457,18 @@ def find_text_in_plan(label, x):
   return substring_coordinates, words, point_list
-def get_selected_columns_by_index(df, column_index_list):
   selected_df = df.iloc[:, column_index_list]
   return selected_df
 ## Get the column indices from extract_tables(schedule)
@@ -478,9 +490,10 @@ def get_column_indices_from_dfs_normal(dfs, user_patterns):
     cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
     cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
-    if len(cell_matches) == 0 and len(col_matches) == 0:
       continue
     else:
       #IN COLUMNS
       if len(col_matches) == len(non_empty_info):
@@ -498,6 +511,73 @@ def get_column_indices_from_dfs_normal(dfs, user_patterns):
   return column_index_list
 # get the index of dataframe that has the maximum column matches in the dfs from model table detection
 def get_df_index(dfs, user_patterns):
   df_matches = []
@@ -615,6 +695,9 @@ def get_cleaned_data(locations):
 # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
 def get_width_info_tobeprinted(new_data):
   width_info_tobeprinted = []
   if len(new_data[0]) == 4:
     for _,_,_, w in new_data:
       #w = re.sub(r",", "", w)
@@ -626,7 +709,8 @@ def get_width_info_tobeprinted(new_data):
       h = re.sub(r",", "", h)
       #if w == "N/A":
-      if w.isalpha():
         w = w
       else:
         if float(w).is_integer():
@@ -634,7 +718,8 @@ def get_width_info_tobeprinted(new_data):
         else:
           w = w
       #if h == "N/A":
-      if h.isalpha():
         h = h
       else:
         if float(h).is_integer():
@@ -675,70 +760,14 @@ def get_widths_bb_format(cleaned_width, kelma):
   return widths
-'''def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
-  width_info_tobeprinted = []
-  secondary_info_tobeprinted = []
-  if len(main_info) == 2 and len(secondary_info) == 1:
-   for coords, label, acous, color in new_data:
-      secondary_info_tobeprinted.append(acous)
-  if len(main_info) == 2 and len(secondary_info) == 2:
-    for coords, label, acous, fire, color in new_data:
-       secondary_info_tobeprinted.append((acous, fire))
-  if len(main_info) == 3 and len(secondary_info) == 1:
-    for coords, label, width, acous, color in new_data:
-      width_info_tobeprinted.append(width)
-      secondary_info_tobeprinted.append(acous)
-  if len(main_info) == 3 and len(secondary_info) == 2:
-    for coords, label, width, acous, fire, color in new_data:
-      width_info_tobeprinted.append(width)
-      secondary_info_tobeprinted.append((acous, fire))
-  if len(main_info) == 4 and len(secondary_info) == 1:
-    for coords, label, width, height, acous, color in new_data:
-      w = re.sub(r",", "", width)
-      h = re.sub(r",", "", height)
-      if float(w).is_integer():
-        w = int(float(w))
-      else:
-        w = w
-      if float(h).is_integer():
-        h = int(float(h))
-      else:
-        h = h
-      width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
-      secondary_info_tobeprinted.append(acous)
-  if len(main_info) == 4 and len(secondary_info) == 2:
-    for coords, label, width, height, acous, fire, color in new_data:
-      print(type(width))
-      print(type(height))
-      w = re.sub(r",", "", width)
-      h = re.sub(r",", "", height)
-      if w == "N/A":
-        w = w
-      else:
-        if float(w).is_integer():
-          w = int(float(w))
-        else:
-          w = w
-      if h == "N/A":
-        h = h
-      else:
-        if float(h).is_integer():
-          h = int(float(h))
-        else:
-          h = h
-      width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
-      secondary_info_tobeprinted.append((acous, fire))
-  return width_info_tobeprinted, secondary_info_tobeprinted
-'''
 def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
   width_info_tobeprinted = []
@@ -747,11 +776,13 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
   if len(main_info) == 2 and len(secondary_info) == 1:
    for coords, label, acous, color in new_data:
       secondary_info_tobeprinted.append(acous)
   if len(main_info) == 2 and len(secondary_info) == 2:
     for coords, label, acous, fire, color in new_data:
        secondary_info_tobeprinted.append((acous, fire))
   if len(main_info) == 3 and len(secondary_info) == 1:
     for coords, label, width, acous, color in new_data:
@@ -768,7 +799,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
     for coords, label, width, height, acous, color in new_data:
       w = re.sub(r",", "", width)
       h = re.sub(r",", "", height)
-      if w.isalpha():
         w = w
       else:
         if float(w).is_integer():
@@ -776,7 +808,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
         else:
           w = w
       #if h == "N/A":
-      if h.isalpha():
         h = h
       else:
         if float(h).is_integer():
@@ -784,7 +817,7 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
         else:
           h = h
       width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
-      secondary_info_tobeprinted.append((acous, fire))
   if len(main_info) == 4 and len(secondary_info) == 2:
     for coords, label, width, height, acous, fire, color in new_data:
@@ -793,7 +826,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
       w = re.sub(r",", "", width)
       h = re.sub(r",", "", height)
       #if w == "N/A":
-      if w.isalpha():
         w = w
       else:
         if float(w).is_integer():
@@ -801,7 +835,8 @@ def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
         else:
           w = w
       #if h == "N/A":
-      if h.isalpha():
         h = h
       else:
         if float(h).is_integer():
@@ -916,6 +951,13 @@ def get_flattened_tuples_list_all(col_dict):
     return flattened_list
 #SECONDARY
 def get_cleaned_data_secondary(locations, main_info, secondary_info):
@@ -985,31 +1027,6 @@ def get_cleaned_data_secondary(locations, main_info, secondary_info):
   return new_data
-def get_secondary_tobeprinted_clean(selected_secondary_info, secondary_tobeprinted, secondary_info):
-  secondary_printed_clean = []
-  if len(secondary_info) == 1:
-    if any('acoustic' in col for col in selected_secondary_info.columns):
-      for acous in secondary_tobeprinted:
-        new_text = f"acoustic rating: {acous};"
-        secondary_printed_clean.append(new_text)
-    if any('fire' in col for col in selected_secondary_info.columns):
-      for fire in secondary_tobeprinted:
-        new_text = f"fire rating: {fire};"
-        secondary_printed_clean.append(new_text)
-  if len(secondary_info) == 2:
-    for fire, acous in secondary_tobeprinted:
-      new_text = f"fire rating: {fire}; acoustic rating: {acous};"
-      secondary_printed_clean.append(new_text)
-      print(new_text)
-  return secondary_printed_clean
-def mix_width_secondary(widths, secondary_printed_clean):
-  all_print = []
-  for i in range(len(widths)):
-    newest_text = f"{widths[i]}; {secondary_printed_clean[i]}"
-    all_print.append(newest_text)
-  return all_print
 def merge_pdf_bytes_list(pdfs):
     writer = PdfWriter()
@@ -1026,331 +1043,6 @@ def merge_pdf_bytes_list(pdfs):
     return output_stream.read()
-'''def add_bluebeam_count_annotations_secondary(pdf_bytes, locations, main_info, secondary_info):
-    pdf_stream = io.BytesIO(pdf_bytes)  # Load PDF from bytes
-    pdf_document = fitz.open("pdf", pdf_stream.read())  # Open PDF in memory
-    page = pdf_document[0]  # First page
-    if len(main_info) == 2 and len(secondary_info) == 1:
-        for loc in locations:
-            coor, lbl, acous, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    if len(main_info) == 2 and len(secondary_info) == 2:
-        for loc in locations:
-            coor, lbl, acous, fire, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    if len(main_info) == 3 and len(secondary_info) == 1:
-        for loc in locations:
-            if len(loc) != 5:
-                continue
-            coor, lbl, w, acous, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    if len(main_info) == 3 and len(secondary_info) == 2:
-        for loc in locations:
-            coor, lbl, w, acous, fire, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    if len(main_info) == 4 and len(secondary_info) == 1:
-        for loc in locations:
-            coor, lbl, w, h, acous, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    if len(main_info) == 4 and len(secondary_info) == 2:
-        for loc in locations:
-            coor, lbl, w, h, acous, fire, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    #Save modified PDF to a variable instead of a file
-    output_stream = io.BytesIO()
-    pdf_document.save(output_stream)
-    pdf_document.close()
-    return output_stream.getvalue()  # Return the modified PDF as bytes
-def modify_author_in_pypdf2(pdf_bytes, new_authors):
-    pdf_stream = io.BytesIO(pdf_bytes)  # Load PDF from bytes
-    reader = PyPDF2.PdfReader(pdf_stream)
-    writer = PyPDF2.PdfWriter()
-    author_index = 0  # Track author assignment
-    for page in reader.pages:
-        if "/Annots" in page:  #Check if annotations exist
-            for annot in page["/Annots"]:
-                annot_obj = annot.get_object()
-                # Assign each annotation a unique author
-                if len(new_authors) == 0:
-                    break
-                if author_index < len(new_authors):
-                    annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
-                    author_index += 1  # Move to next author
-                # If authors list is exhausted, keep the last one
-                else:
-                    annot_obj.update({"/T": TextStringObject(new_authors[-1])})
-        writer.add_page(page)
-    #Save the modified PDF to a variable
-    output_stream = io.BytesIO()
-    writer.write(output_stream)
-    output_stream.seek(0)
-    return output_stream.read()
-def add_bluebeam_count_annotations(pdf_bytes, locations):
-    pdf_stream = io.BytesIO(pdf_bytes)  # Load PDF from bytes
-    pdf_document = fitz.open("pdf", pdf_stream.read())  # Open PDF in memory
-    page = pdf_document[0]  # First page
-    print(f"length of locations 0 from not sec presence: {len(locations[0])}")
-    for loc in locations:
-      if len(loc) == 3:
-            coor, lbl, clr = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-      if len(loc) == 4:
-            coor, lbl, clr,w = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-      if len(loc) == 5:
-            coor, lbl, clr,w,h = loc
-            clr = (clr[0] / 255, clr[1] / 255, clr[2] / 255)
-            for cor in coor:
-                #Create a Circle annotation (Count Markup)
-                annot = page.add_circle_annot(
-                    fitz.Rect(cor[0] - 10, cor[1] - 10, cor[0] + 10, cor[1] + 10)  # Small circle
-                )
-                #Assign required Bluebeam metadata
-                annot.set_colors(stroke=clr, fill=(1, 1, 1))  # Set stroke color and fill white
-                annot.set_border(width=2)  # Border thickness
-                annot.set_opacity(1)  # Fully visible
-                #Set annotation properties for Bluebeam Count detection
-                annot.set_info("name", lbl)  # Unique name for each count
-                annot.set_info("subject", "Count")  #Bluebeam uses "Count" for Count markups
-                annot.set_info("title", lbl)  # Optional
-                annot.update()  # Apply changes
-    #Save modified PDF to a variable instead of a file
-    output_stream = io.BytesIO()
-    pdf_document.save(output_stream)
-    pdf_document.close()
-    return output_stream.getvalue()  # Return the modified PDF as bytes
-def modify_author_in_pypdf2(pdf_bytes, new_authors):
-    pdf_stream = io.BytesIO(pdf_bytes)  # Load PDF from bytes
-    reader = PyPDF2.PdfReader(pdf_stream)
-    writer = PyPDF2.PdfWriter()
-    author_index = 0  # Track author assignment
-    for page in reader.pages:
-        if "/Annots" in page:  #Check if annotations exist
-            for annot in page["/Annots"]:
-                annot_obj = annot.get_object()
-                # Assign each annotation a unique author
-                if len(new_authors) == 0:
-                    break
-                if author_index < len(new_authors):
-                    annot_obj.update({"/T": TextStringObject(new_authors[author_index])})#Convert to PdfString
-                    author_index += 1  # Move to next author
-                # If authors list is exhausted, keep the last one
-                else:
-                    annot_obj.update({"/T": TextStringObject(new_authors[-1])})
-        writer.add_page(page)
-    #Save the modified PDF to a variable
-    output_stream = io.BytesIO()
-    writer.write(output_stream)
-    output_stream.seek(0)
-    return output_stream.read()
-def merge_pdf_bytes_list(pdfs):
-    writer = PdfWriter()
-    for pdf_bytes in pdfs:
-        pdf_stream = io.BytesIO(pdf_bytes)
-        reader = PdfReader(pdf_stream)
-        for page in reader.pages:
-            writer.add_page(page)
-    output_stream = io.BytesIO()
-    writer.write(output_stream)
-    output_stream.seek(0)
-    return output_stream.read()
-def process_pdf_secondary(input_pdf_path, output_pdf_path, locations, new_authors, main_info, secondary_info):
-    if isinstance(input_pdf_path, bytes):
-      original_pdf_bytes = input_pdf_path
-    else:
-        with open(input_pdf_path, "rb") as file:
-            original_pdf_bytes = file.read()
-    #Add Bluebeam-compatible count annotations
-    annotated_pdf_bytes = add_bluebeam_count_annotations_secondary(original_pdf_bytes, locations, main_info, secondary_info)
-    #Modify author field using PyPDF2
-    final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
-    return final_pdf_bytes
-def process_pdf(input_pdf_path, output_pdf_path, locations, new_authors):
-    #Load original PDF
-    if isinstance(input_pdf_path, bytes):
-        original_pdf_bytes = input_pdf_path
-    else:
-        with open(input_pdf_path, "rb") as file:
-            original_pdf_bytes = file.read()
-    #Add Bluebeam-compatible count annotations
-    annotated_pdf_bytes = add_bluebeam_count_annotations(original_pdf_bytes, locations)
-    #Modify author field using PyPDF2
-    final_pdf_bytes = modify_author_in_pypdf2(annotated_pdf_bytes, new_authors)
-    return final_pdf_bytes
-'''
 def calculate_bounding_rect_count(vertices,padding):
     x, y = vertices[0]
@@ -1365,13 +1057,6 @@ def rgb_string_to_hex(rgb_string):
     return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
 def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
                                    label: str = '',height:str='',width:str='',
                                    color:str='',countstyle:str='',countsize:str=''):
@@ -1528,6 +1213,17 @@ def mirrored_points(x, y, height_plan):
   mirrored = []
   mirrored.append([x, height_plan - y])
   return mirrored
 # Modified to adjust mirrored points
 def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
@@ -1537,7 +1233,8 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
     R = str(float(r/255))
     G = str(float(g/255))
     B = str(float(b/255))
-    vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
     if input_user_clmn_names[4] and input_user_clmn_names[5]:
         bax_annotations.append({
           'vertices': vertix,
@@ -1559,7 +1256,7 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
               'vertices': vertix,
               'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
               'author': 'ADR',
-              'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
               'label': new_data[i][1], #change label to whatever u want
               'Height': heightat[i],  #for tameem to change - i added any values'
               'Width':widthat[i],
@@ -1573,7 +1270,7 @@ def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted,
               'vertices': vertix,
               'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
               'author': 'ADR',
-              'custom_data': {'FireRating': secondary_tobeprinted[i][0], 'AcousticRating': secondary_tobeprinted[i][1], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
               'label': new_data[i][1], #change label to whatever u want
               'Height': heightat[i],  #for tameem to change - i added any values'
               'Width':widthat[i],
@@ -1596,7 +1293,8 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
     G = str(float(g/255))
     B = str(float(b/255))
-    vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
     bax_annotations.append({
           'vertices': vertix,
           'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
@@ -1613,6 +1311,7 @@ def create_bb_bax(new_data, widthat, heightat, CountStyles, page_number, height_
   return bax_annotations
 #Handle missing widths or heights in some rows
 def generate_separate_dimensions(widths):
   widthat = []
@@ -1648,14 +1347,6 @@ def generate_bluebeam_columns_raw(column_names):
     return tostring(root, encoding="unicode", method="xml")
-# Example usage
-# column_names = ["mycustomcolumn"]
-#column_xml = generate_bluebeam_columns_raw(column_order)
-#with open("count_type_Windows.xml", "w", encoding="utf-8") as f:
-#    f.write(column_xml)
-#print(column_xml)
 def mainRun(schedule, plan, searcharray):
     print("mainRun is RUNNING")
@@ -1668,11 +1359,12 @@ def mainRun(schedule, plan, searcharray):
     p1_type = type(plan[0])
     print(f"el mawgood fe p[0]: {p1_type}")
-    print(f"search array: {searcharray}")
     #dfs = extract_tables(schedule)
     print(f"type of schedule: {type(schedule)}")
-    dfs = extract_tables_model(schedule)
     pdf_widths = []
     pdf_heights = []
     pdfs_count_type = []
@@ -1681,6 +1373,9 @@ def mainRun(schedule, plan, searcharray):
     page_number = 0
     bax_annotations_all_inputs = [] #for the same plan
     #pdfs = []
     for p in plan:
         annotation_counter +=1
         page_number +=1
@@ -1691,82 +1386,98 @@ def mainRun(schedule, plan, searcharray):
         width_plan = page.cropbox.width   # or: width = rect.x1 - rect.x0
         height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
-        width_plan = math.ceil(width_plan)
-        height_plan = math.ceil(height_plan)
-        for j in range(len(searcharray)):
-          user_input = searcharray[j]
-          secondary_presence = False
-          if user_input[4] or user_input[5]:
-            secondary_presence = True
-            main_info_, secondary_info_ = separate_main_secondary(user_input)
-            main_info = [item for item in main_info_ if item]
-            secondary_info = [item for item in secondary_info_ if item]
-            print("feh secondary information")
-            if user_input[4]:
-              print("Fire rate mawgooda")
-            if user_input[5]:
-              print("Acoustic Rate mawgooda")
-          else:
-            print("mafeesh secondary information")
-          selected_columns_combined = get_selected_columns_all(dfs, user_input)
-          if selected_columns_combined is None:
-              dfs_normal = extract_tables(schedule)
-              column_indices = get_column_indices_from_dfs_normal(dfs_normal, user_input)
-              if len(dfs) == 1:
-                selected_columns_combined = get_selected_columns_by_index(dfs[0], column_indices)
-              if len(dfs) > 1:
-                index_df = get_df_index(dfs, input_user_clmn_names)
-                selected_columns_combined = get_selected_columns_by_index(dfs[index_df], column_indices)
-          selected_columns_combined = selected_columns_combined.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
-          selected_columns_combined = selected_columns_combined.fillna('N/A')
-          kelma = get_st_op_pattern(selected_columns_combined, user_input)
-          col_dict = get_similar_colors_all(selected_columns_combined)
-          flattened_list = get_flattened_tuples_list_all(col_dict)
-          plan_texts = read_text(p)
-          if secondary_presence:
-                  plan_texts = read_text(p)
-                  locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
-                  new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
-                  repeated_labels = get_repeated_labels(locations)
-                  if kelma == None:
-                      widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
-                  else:
-                      width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
-                      cleaned_width = get_cleaned_width(width_info_tobeprinted)
-                      widths = get_widths_bb_format(cleaned_width, kelma)
-                  secondary_printed_clean =  get_secondary_tobeprinted_clean(selected_columns_combined, secondary_tobeprinted, secondary_info)
-                  all_print =  mix_width_secondary(widths, secondary_printed_clean)
-                  #Count type annotation
-                  widht_count, height_count = generate_separate_dimensions(widths)
-                  bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, height_plan)
-                  bax_annotations_all_inputs.append(bax)
-          else:
-              locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
-              new_data = get_cleaned_data(locations)
-              if len(new_data) == 0:
-                  continue
-              repeated_labels = get_repeated_labels(locations)
-              if kelma == None:
-                  widths = get_width_info_tobeprinted(new_data)
-              else:
-                  width_info_tobeprinted = get_width_info_tobeprinted(new_data)
-                  cleaned_width = get_cleaned_width(width_info_tobeprinted)
-                  widths = get_widths_bb_format(cleaned_width, kelma)
-              #count type annotation
-              widht_count, height_count = generate_separate_dimensions(widths)
-              bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, height_plan)
-              bax_annotations_all_inputs.append(bax)
         # if it is not byte type
         #pdfs_count_type.append(convert_to_bytes(p))
         pdfs_count_type.append(p)
@@ -1781,14 +1492,21 @@ def mainRun(schedule, plan, searcharray):
     column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
     pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
     column_xml = generate_bluebeam_columns_raw(column_order)
     ##### SHOULD return pretty_xml, column_xml, merged_pdf
-    not_found = []
     annotatedimgs=[]
     doc2 =fitz.open('pdf',merged_pdf)
     len_doc2 = len(doc2)
@@ -1816,6 +1534,4 @@ def mainRun(schedule, plan, searcharray):
                 v='stroke'
               x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
               list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
-    return annotatedimgs, doc2 , list1, repeated_labels , not_found, pretty_xml, column_xml
-#  return annotatedimg, doc2 , list1, repeated_labels , not_found

 from collections import defaultdict
+from collections import Counter
 import pandas as pd
 import random
 import math
   return selected_df
 def crop_rename_table(indices, clmn_name, clmn_idx,df):
   #crop_at = (max(set(indices), key=indices.count)) + 1
   crop_at =  max(indices) + 1
   # fixed column names
   fixed_list = ["door_id", "door_type", "width", "height"]
   for i in range(len(empty_indices)):
+    if empty_indices[i] == 3 and empty_indices[i - 1] == 2:
+      fixed_list[2] = ""
+    if empty_indices[i] == 3 and not empty_indices[i - 1] == 2:
+     fixed_list[2] = "structural_opening"
     fixed_list[empty_indices[i]] = ""
   #finalize the column name structure
   return substring_coordinates, words, point_list
+def get_selected_columns_by_index(df, column_index_list, user_patterns):
   selected_df = df.iloc[:, column_index_list]
+  # Rename columns to match the structure of the clr_dictionary
+  main_info, secondary_info = separate_main_secondary(user_patterns)
+  clmn_name_main = get_column_name(main_info)
+  clmn_name_secondary = get_column_name_secondary(secondary_info)
+  clmn_name = clmn_name_main + clmn_name_secondary
+  print(f"clmn_name from the function el 3amla moshkela: {clmn_name}")
+  selected_df.columns = clmn_name
   return selected_df
 ## Get the column indices from extract_tables(schedule)
     cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
     cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
+    if len(cell_matches) == 0 and len(col_matches) == 0 and i < len(dfs) - 1:
       continue
+    elif len(cell_matches) == 0 and len(col_matches) == 0:
+       column_index_list = None
     else:
       #IN COLUMNS
       if len(col_matches) == len(non_empty_info):
   return column_index_list
+def find_missing_columns(complete_list, non_complete_list):
+  def normalize_text(text):
+      if not isinstance(text, str):
+          return ""
+      text = re.sub(r'\s+', '', text)  # Remove all whitespace
+      return text.lower()
+  def normalize_text(text):
+    """
+    Normalize text by removing all whitespace, brackets, and converting to lowercase.
+    """
+    if not isinstance(text, str):
+        return ""
+    # Remove all whitespace characters (spaces, tabs, newlines)
+    text = re.sub(r'\s+', '', text)
+    # Remove brackets of any type
+    text = re.sub(r'[\(\)\[\]\{\}]', '', text)
+    return text.lower()
+  complete_list = complete_list
+  non_complete = non_complete_list
+  # Normalize non_complete just once for speed
+  normalized_non_complete = [normalize_text(item) for item in non_complete]
+  missing = []
+  for item in complete_list:
+      normalized_item = normalize_text(item)
+      if normalized_item not in normalized_non_complete:
+          missing.append(item)
+  #delete empty fields as it is the 6 fixed fields approach
+  missing = [item for item in missing if item]
+  #print(f"{missing} can't be found in the schedule, make sure you entered it right or try entering the first row information instead of the column names")
+  return missing
+# Returns the columns the code failed to locate on the schedule
+def check_missing(dfs, user_patterns):
+  all_words = []
+  for i in range(len(dfs)):
+    main_info, secondary_info = separate_main_secondary(user_patterns)
+    clmn_name_main = get_column_name(main_info)
+    non_empty_main_info = [item for item in main_info if item]
+    clmn_name_secondary = get_column_name_secondary(secondary_info)
+    non_empty_secondary_info = [item for item in secondary_info if item]
+    clmn_name = clmn_name_main + clmn_name_secondary
+    non_empty_info = non_empty_main_info + non_empty_secondary_info
+    cell_columns_appearance = flexible_search(dfs[i], non_empty_info)
+    cell_matches, col_matches = analyse_cell_columns(cell_columns_appearance)
+    words = [dfs[i].iloc[row, col] for row, col in cell_matches]
+    all_words.append(words)
+  found_words = max(all_words, key=len)
+  print(found_words)
+  missings = find_missing_columns(user_patterns, found_words)
+  return missings
 # get the index of dataframe that has the maximum column matches in the dfs from model table detection
 def get_df_index(dfs, user_patterns):
   df_matches = []
 # law 0.5 maslan tetkatab we law mesh keda yesheel el decimal point
 def get_width_info_tobeprinted(new_data):
   width_info_tobeprinted = []
+  if len(new_data[0]) < 4:
+    for _,_,_, in new_data:
+      width_info_tobeprinted.append("N/A mm wide x N/A mm high")
   if len(new_data[0]) == 4:
     for _,_,_, w in new_data:
       #w = re.sub(r",", "", w)
       h = re.sub(r",", "", h)
       #if w == "N/A":
+      #if w.isalpha():
+      if is_not_number(w):
         w = w
       else:
         if float(w).is_integer():
         else:
           w = w
       #if h == "N/A":
+      #if h.isalpha():
+      if is_not_number(h):
         h = h
       else:
         if float(h).is_integer():
   return widths
+def is_not_number(s: str) -> bool:
+    try:
+        float(s)          # accepts ints, floats, scientific notation
+        return False       # it *is* a number
+    except ValueError:
+        return True        # not a number
 def get_width_info_tobeprinted_secondary(new_data, main_info, secondary_info):
   width_info_tobeprinted = []
   if len(main_info) == 2 and len(secondary_info) == 1:
    for coords, label, acous, color in new_data:
       secondary_info_tobeprinted.append(acous)
+      width_info_tobeprinted.append("N/A mm wide x N/A mm high")
   if len(main_info) == 2 and len(secondary_info) == 2:
     for coords, label, acous, fire, color in new_data:
        secondary_info_tobeprinted.append((acous, fire))
+       width_info_tobeprinted.append("N/A mm wide x N/A mm high")
   if len(main_info) == 3 and len(secondary_info) == 1:
     for coords, label, width, acous, color in new_data:
     for coords, label, width, height, acous, color in new_data:
       w = re.sub(r",", "", width)
       h = re.sub(r",", "", height)
+      #if w.isalpha():
+      if is_not_number(w):
         w = w
       else:
         if float(w).is_integer():
         else:
           w = w
       #if h == "N/A":
+      #if h.isalpha():
+      if is_not_number(h):
         h = h
       else:
         if float(h).is_integer():
         else:
           h = h
       width_info_tobeprinted.append(f"{w} mm wide x {h} mm high")
+      secondary_info_tobeprinted.append((acous))
   if len(main_info) == 4 and len(secondary_info) == 2:
     for coords, label, width, height, acous, fire, color in new_data:
       w = re.sub(r",", "", width)
       h = re.sub(r",", "", height)
       #if w == "N/A":
+      #if w.isalpha():
+      if is_not_number(w):
         w = w
       else:
         if float(w).is_integer():
         else:
           w = w
       #if h == "N/A":
+      #if h.isalpha():
+      if is_not_number(h):
         h = h
       else:
         if float(h).is_integer():
     return flattened_list
+def get_flattened_tuples_list_no_doortype(selected_columns):
+  flattened_list_no_color = list(selected_columns.itertuples(name=None, index=False))
+  col  = (0,0,255)
+  new_fl_list = []
+  for tu in flattened_list_no_color:
+    new_fl_list.append(tu + (col,))
+  return new_fl_list
 #SECONDARY
 def get_cleaned_data_secondary(locations, main_info, secondary_info):
   return new_data
 def merge_pdf_bytes_list(pdfs):
     writer = PdfWriter()
     return output_stream.read()
 def calculate_bounding_rect_count(vertices,padding):
     x, y = vertices[0]
     return '#{:02X}{:02X}{:02X}'.format(int(r * 255), int(g * 255), int(b * 255))
 def generate_annotation_xml_block_count(vertices, area_text, author, custom_data: dict, column_order: list, index: int,
                                    label: str = '',height:str='',width:str='',
                                    color:str='',countstyle:str='',countsize:str=''):
   mirrored = []
   mirrored.append([x, height_plan - y])
   return mirrored
+def point_mupdf_to_pdf(x, y, page):
+    rect = page.rect
+    mediabox = page.mediabox
+    H = float(rect.height)
+    # Convert and adjust for mediabox offset
+    pdf_x = mediabox.x0 + x
+    pdf_y = mediabox.y0 + (H - y)
+    return [[pdf_x, pdf_y]]
 # Modified to adjust mirrored points
 def create_bb_bax_secondary(new_data, widthat, heightat, secondary_tobeprinted, CountStyles, input_user_clmn_names, page_number, height_plan):
     R = str(float(r/255))
     G = str(float(g/255))
     B = str(float(b/255))
+    #vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
+    vertix = point_mupdf_to_pdf(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
     if input_user_clmn_names[4] and input_user_clmn_names[5]:
         bax_annotations.append({
           'vertices': vertix,
               'vertices': vertix,
               'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
               'author': 'ADR',
+              'custom_data': {'FireRating': secondary_tobeprinted[i], 'AcousticRating': 'N/A', 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
               'label': new_data[i][1], #change label to whatever u want
               'Height': heightat[i],  #for tameem to change - i added any values'
               'Width':widthat[i],
               'vertices': vertix,
               'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
               'author': 'ADR',
+              'custom_data': {'FireRating': 'N/A', 'AcousticRating': secondary_tobeprinted[i], 'Height_': heightat[i],'Width_': widthat[i]} , #identify custom colums here as( Column name: Text to add )
               'label': new_data[i][1], #change label to whatever u want
               'Height': heightat[i],  #for tameem to change - i added any values'
               'Width':widthat[i],
     G = str(float(g/255))
     B = str(float(b/255))
+    #vertix = mirrored_points(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
+    vertix = point_mupdf_to_pdf(new_data[i][0][0][0], new_data[i][0][0][1], height_plan)
     bax_annotations.append({
           'vertices': vertix,
           'text': '1', #number of counts in one time (in markup written as count 1) -> if u want to change it we can look for a way
   return bax_annotations
 #Handle missing widths or heights in some rows
 def generate_separate_dimensions(widths):
   widthat = []
     return tostring(root, encoding="unicode", method="xml")
 def mainRun(schedule, plan, searcharray):
     print("mainRun is RUNNING")
     p1_type = type(plan[0])
     print(f"el mawgood fe p[0]: {p1_type}")
+    print(f"length of search array: {len(searcharray)}")
     #dfs = extract_tables(schedule)
     print(f"type of schedule: {type(schedule)}")
+    print(f"length of schedules: {len(schedule)}")
     pdf_widths = []
     pdf_heights = []
     pdfs_count_type = []
     page_number = 0
     bax_annotations_all_inputs = [] #for the same plan
     #pdfs = []
+    not_found_list = []
+    repeated_labels_list = []
+    missings = []
     for p in plan:
         annotation_counter +=1
         page_number +=1
         width_plan = page.cropbox.width   # or: width = rect.x1 - rect.x0
         height_plan = page.cropbox.height # or: height = rect.y1 - rect.y0
+        #width_plan = math.ceil(width_plan)
+        #height_plan = math.ceil(height_plan)
+        for k in range(len(schedule)):
+          dfs = extract_tables_model(schedule[k])
+          user_input_this_schedule = searcharray[k]
+          for j in range(len(user_input_this_schedule)):
+            user_input = user_input_this_schedule[j]
+            secondary_presence = False
+            if user_input[4] or user_input[5]:
+              secondary_presence = True
+              main_info_, secondary_info_ = separate_main_secondary(user_input)
+              main_info = [item for item in main_info_ if item]
+              secondary_info = [item for item in secondary_info_ if item]
+              print("feh secondary information")
+              if user_input[4]:
+                print("Fire rate mawgooda")
+              if user_input[5]:
+                print("Acoustic Rate mawgooda")
+            else:
+              print("mafeesh secondary information")
+            selected_columns_combined = get_selected_columns_all(dfs, user_input)
+            if selected_columns_combined is None:
+                dfs_normal = extract_tables(schedule[k])
+                column_indices = get_column_indices_from_dfs_normal(dfs_normal, user_input)
+                if column_indices is None:
+                   missing_clmns = check_missing(dfs, user_input)
+                   missing_message = f"{missing_clmns} can't be extracted from table input {j+1} in schedule {k+1}"
+                   missings.append(missing_message)
+                   continue # continue to the next user input
+                if len(dfs) == 1:
+                  selected_columns_combined = get_selected_columns_by_index(dfs[0], column_indices, user_input)
+                if len(dfs) > 1:
+                  index_df = get_df_index(dfs, user_input)
+                  selected_columns_combined = get_selected_columns_by_index(dfs[index_df], column_indices, user_input)
+            selected_columns_combined = selected_columns_combined.applymap(lambda x: 'N/A' if isinstance(x, str) and x.strip() == '' else x)
+            selected_columns_combined = selected_columns_combined.fillna('N/A')
+            selected_columns_combined = selected_columns_combined.replace(r'(?i)\bn/a\b', 'N/A', regex=True)
+            kelma = get_st_op_pattern(selected_columns_combined, user_input)
+            if "door_type" in selected_columns_combined.columns:
+              col_dict = get_similar_colors_all(selected_columns_combined)
+              flattened_list = get_flattened_tuples_list_all(col_dict)
+            else:
+               if secondary_presence:
+                  main_info = main_info + [""]
+               flattened_list = get_flattened_tuples_list_no_doortype(selected_columns_combined)
+            plan_texts = read_text(p)
+            if secondary_presence:
+                    locations, not_found = get_word_locations_plan_secondary(flattened_list,plan_texts, main_info, secondary_info)
+                    not_found_list.append(not_found)
+                    new_data3 = get_cleaned_data_secondary(locations,main_info,secondary_info)
+                    repeated_labels = get_repeated_labels(locations)
+                    repeated_labels = list(repeated_labels)
+                    repeated_labels_list.append(repeated_labels)
+                    if kelma == None:
+                        widths, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
+                    else:
+                        width_info_tobeprinted, secondary_tobeprinted = get_width_info_tobeprinted_secondary(new_data3, main_info, secondary_info)
+                        cleaned_width = get_cleaned_width(width_info_tobeprinted)
+                        widths = get_widths_bb_format(cleaned_width, kelma)
+                    #Count type annotation
+                    widht_count, height_count = generate_separate_dimensions(widths)
+                    bax = create_bb_bax_secondary(new_data3, widht_count, height_count, secondary_tobeprinted, CountStyles, user_input, page_number, page)
+                    bax_annotations_all_inputs.append(bax)
+            else:
+                locations, not_found = get_word_locations_plan(flattened_list,plan_texts)
+                not_found_list.append(not_found)
+                new_data = get_cleaned_data(locations)
+                if len(new_data) == 0:
+                    continue
+                repeated_labels = get_repeated_labels(locations)
+                repeated_labels = list(repeated_labels)
+                repeated_labels_list.append(repeated_labels)
+                if kelma == None:
+                    widths = get_width_info_tobeprinted(new_data)
+                else:
+                    width_info_tobeprinted = get_width_info_tobeprinted(new_data)
+                    cleaned_width = get_cleaned_width(width_info_tobeprinted)
+                    widths = get_widths_bb_format(cleaned_width, kelma)
+                #count type annotation
+                widht_count, height_count = generate_separate_dimensions(widths)
+                bax = create_bb_bax(new_data, widht_count, height_count, CountStyles, page_number, page)
+                bax_annotations_all_inputs.append(bax)
         # if it is not byte type
         #pdfs_count_type.append(convert_to_bytes(p))
         pdfs_count_type.append(p)
     column_order = ['FireRating', 'AcousticRating', 'Height_', 'Width_']
+    ## Getting the not found in all plans
+    flattened_not_found_list = [item for sublist in not_found_list for item in sublist]
+    counts_not_found = Counter(flattened_not_found_list)
+    not_found_any_plan = []
+    for key, value in counts_not_found.items():
+      if value == len(pdfs_count_type):
+        not_found_any_plan.append(key)
+    flattened_repeated_labels_list = [item for sublist in repeated_labels_list for item in sublist]
     pretty_xml = save_multiple_annotations_count_bax(bax_annotation, 'count_type_Windows.bax', column_order,pdf_widths,pdf_heights,page_number)
     column_xml = generate_bluebeam_columns_raw(column_order)
+    repeated_labels = flattened_repeated_labels_list
     ##### SHOULD return pretty_xml, column_xml, merged_pdf
+    not_found = not_found_any_plan
     annotatedimgs=[]
     doc2 =fitz.open('pdf',merged_pdf)
     len_doc2 = len(doc2)
                 v='stroke'
               x,y,z=int(annot_color.get(v)[0]*255),int(annot_color.get(v)[1]*255),int(annot_color.get(v)[2]*255)
               list1.loc[len(list1)] =[annot.info['content'],annot.info['id'],annot.info['subject'],[x,y,z]]
+    return annotatedimgs, doc2 , list1, repeated_labels , not_found, pretty_xml, column_xml