LatestDuplicate_Working / Legend_Detection.py
Marthee's picture
Update Legend_Detection.py
95234c6 verified
# """## Imports"""
import sys
import math
import random
import string
import zlib
import base64
import datetime
import uuid
import re
from io import BytesIO
from ctypes import sizeof
from collections import Counter
from typing import NewType
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, tostring, ElementTree
from xml.dom.minidom import parseString
import numpy as np
import cv2
from matplotlib import pyplot as plt
from matplotlib.patches import Polygon
from shapely.geometry import Point, Polygon as ShapelyPolygon
from shapely.ops import unary_union
from PIL import Image, ImageDraw, ImageFont, ImageColor
import fitz
import ezdxf
from ezdxf import units, bbox
from ezdxf.colors import aci2rgb
from ezdxf.math import OCS, Matrix44, Vec3, Vec2
import pandas as pd
# import google_sheet_Legend
# import tsadropboxretrieval
from PyPDF2 import PdfReader, PdfWriter
from PyPDF2.generic import (
NameObject,
TextStringObject,
DictionaryObject,
ArrayObject,
FloatObject,
NumberObject,
)
from math import sin, cos, radians, isclose
def aci_to_rgb(aci):
aci_rgb_map = {
0: (0, 0, 0),
1: (255, 0, 0),
2: (255, 255, 0),
3: (0, 255, 0),
4: (0, 255, 255),
5: (0, 0, 255),
6: (255, 0, 255),
7: (255, 255, 255),
8: (65, 65, 65),
9: (128, 128, 128),
10: (255, 0, 0),
11: (255, 170, 170),
12: (189, 0, 0),
13: (189, 126, 126),
14: (129, 0, 0),
15: (129, 86, 86),
16: (104, 0, 0),
17: (104, 69, 69),
18: (79, 0, 0),
19: (79, 53, 53),
20: (255, 63, 0),
21: (255, 191, 170),
22: (189, 46, 0),
23: (189, 141, 126),
24: (129, 31, 0),
25: (129, 96, 86),
26: (104, 25, 0),
27: (104, 78, 69),
28: (79, 19, 0),
29: (79, 59, 53),
30: (255, 127, 0),
31: (255, 212, 170),
32: (189, 94, 0),
33: (189, 157, 126),
34: (129, 64, 0),
35: (129, 107, 86),
36: (104, 52, 0),
37: (104, 86, 69),
38: (79, 39, 0),
39: (79, 66, 53),
40: (255, 191, 0),
41: (255, 234, 170),
42: (189, 141, 0),
43: (189, 173, 126),
44: (129, 96, 0),
45: (129, 118, 86),
46: (104, 78, 0),
47: (104, 95, 69),
48: (79, 59, 0),
49: (79, 73, 53),
50: (255, 255, 0),
51: (255, 255, 170),
52: (189, 189, 0),
53: (189, 189, 126),
54: (129, 129, 0),
55: (129, 129, 86),
56: (104, 104, 0),
57: (104, 104, 69),
58: (79, 79, 0),
59: (79, 79, 53),
60: (191, 255, 0),
61: (234, 255, 170),
62: (141, 189, 0),
63: (173, 189, 126),
64: (96, 129, 0),
65: (118, 129, 86),
66: (78, 104, 0),
67: (95, 104, 69),
68: (59, 79, 0),
69: (73, 79, 53),
70: (127, 255, 0),
71: (212, 255, 170),
72: (94, 189, 0),
73: (157, 189, 126),
74: (64, 129, 0),
75: (107, 129, 86),
76: (52, 104, 0),
77: (86, 104, 69),
78: (39, 79, 0),
79: (66, 79, 53),
80: (63, 255, 0),
81: (191, 255, 170),
82: (46, 189, 0),
83: (141, 189, 126),
84: (31, 129, 0),
85: (96, 129, 86),
86: (25, 104, 0),
87: (78, 104, 69),
88: (19, 79, 0),
89: (59, 79, 53),
90: (0, 255, 0),
91: (170, 255, 170),
92: (0, 189, 0),
93: (126, 189, 126),
94: (0, 129, 0),
95: (86, 129, 86),
96: (0, 104, 0),
97: (69, 104, 69),
98: (0, 79, 0),
99: (53, 79, 53),
100: (0, 255, 63),
101: (170, 255, 191),
102: (0, 189, 46),
103: (126, 189, 141),
104: (0, 129, 31),
105: (86, 129, 96),
106: (0, 104, 25),
107: (69, 104, 78),
108: (0, 79, 19),
109: (53, 79, 59),
110: (0, 255, 127),
111: (170, 255, 212),
112: (0, 189, 94),
113: (126, 189, 157),
114: (0, 129, 64),
115: (86, 129, 107),
116: (0, 104, 52),
117: (69, 104, 86),
118: (0, 79, 39),
119: (53, 79, 66),
120: (0, 255, 191),
121: (170, 255, 234),
122: (0, 189, 141),
123: (126, 189, 173),
124: (0, 129, 96),
125: (86, 129, 118),
126: (0, 104, 78),
127: (69, 104, 95),
128: (0, 79, 59),
129: (53, 79, 73),
130: (0, 255, 255),
131: (170, 255, 255),
132: (0, 189, 189),
133: (126, 189, 189),
134: (0, 129, 129),
135: (86, 129, 129),
136: (0, 104, 104),
137: (69, 104, 104),
138: (0, 79, 79),
139: (53, 79, 79),
140: (0, 191, 255),
141: (170, 234, 255),
142: (0, 141, 189),
143: (126, 173, 189),
144: (0, 96, 129),
145: (86, 118, 129),
146: (0, 78, 104),
147: (69, 95, 104),
148: (0, 59, 79),
149: (53, 73, 79),
150: (0, 127, 255),
151: (170, 212, 255),
152: (0, 94, 189),
153: (126, 157, 189),
154: (0, 64, 129),
155: (86, 107, 129),
156: (0, 52, 104),
157: (69, 86, 104),
158: (0, 39, 79),
159: (53, 66, 79),
160: (0, 63, 255),
161: (170, 191, 255),
162: (0, 46, 189),
163: (126, 141, 189),
164: (0, 31, 129),
165: (86, 96, 129),
166: (0, 25, 104),
167: (69, 78, 104),
168: (0, 19, 79),
169: (53, 59, 79),
170: (0, 0, 255),
171: (170, 170, 255),
172: (0, 0, 189),
173: (126, 126, 189),
174: (0, 0, 129),
175: (86, 86, 129),
176: (0, 0, 104),
177: (69, 69, 104),
178: (0, 0, 79),
179: (53, 53, 79),
180: (63, 0, 255),
181: (191, 170, 255),
182: (46, 0, 189),
183: (141, 126, 189),
184: (31, 0, 129),
185: (96, 86, 129),
186: (25, 0, 104),
187: (78, 69, 104),
188: (19, 0, 79),
189: (59, 53, 79),
190: (127, 0, 255),
191: (212, 170, 255),
192: (94, 0, 189),
193: (157, 126, 189),
194: (64, 0, 129),
195: (107, 86, 129),
196: (52, 0, 104),
197: (86, 69, 104),
198: (39, 0, 79),
199: (66, 53, 79),
200: (191, 0, 255),
201: (234, 170, 255),
202: (141, 0, 189),
203: (173, 126, 189),
204: (96, 0, 129),
205: (118, 86, 129),
206: (78, 0, 104),
207: (95, 69, 104),
208: (59, 0, 79),
209: (73, 53, 79),
210: (255, 0, 255),
211: (255, 170, 255),
212: (189, 0, 189),
213: (189, 126, 189),
214: (129, 0, 129),
215: (129, 86, 129),
216: (104, 0, 104),
217: (104, 69, 104),
218: (79, 0, 79),
219: (79, 53, 79),
220: (255, 0, 191),
221: (255, 170, 234),
222: (189, 0, 141),
223: (189, 126, 173),
224: (129, 0, 96),
225: (129, 86, 118),
226: (104, 0, 78),
227: (104, 69, 95),
228: (79, 0, 59),
229: (79, 53, 73),
230: (255, 0, 127),
231: (255, 170, 212),
232: (189, 0, 94),
233: (189, 126, 157),
234: (129, 0, 64),
235: (129, 86, 107),
236: (104, 0, 52),
237: (104, 69, 86),
238: (79, 0, 39),
239: (79, 53, 66),
240: (255, 0, 63),
241: (255, 170, 191),
242: (189, 0, 46),
243: (189, 126, 141),
244: (129, 0, 31),
245: (129, 86, 96),
246: (104, 0, 25),
247: (104, 69, 78),
248: (79, 0, 19),
249: (79, 53, 59),
250: (51, 51, 51),
251: (80, 80, 80),
252: (105, 105, 105),
253: (130, 130, 130),
254: (190, 190, 190),
255: (255, 255, 255)
}
# Default to white if index is invalid or not found
return aci_rgb_map.get(aci, (255, 255, 255))
def int_to_rgb(color_int):
"""Convert an integer to an (R, G, B) tuple."""
r = (color_int >> 16) & 255
g = (color_int >> 8) & 255
b = color_int & 255
return (r, g, b)
def get_hatch_color(entity):
"""Extract hatch color with detailed debugging."""
if not entity:
# print("No entity provided for color extraction.")
return (255, 255, 255),'default'
# Check for true color
if entity.dxf.hasattr('true_color'):
true_color = entity.dxf.true_color
rgb_color = int_to_rgb(true_color) # Convert integer to (R, G, B)
# print(f"True color detected (RGB): {rgb_color}")
if(rgb_color == (255, 255, 255)):
return rgb_color,'White true_color'
else:
return rgb_color,'true_color'
# Check for color index
color_index = entity.dxf.color
# print(f"Entity color index: {color_index}")
if 1 <= color_index <= 255:
rgb_color = aci_to_rgb(color_index) # Convert ACI to RGB
# print(f"Converted ACI to RGB: {rgb_color}")
return rgb_color,'aci'
# Handle ByLayer or ByBlock
if color_index == 0: # ByLayer
layer_name = entity.dxf.layer
layer = entity.doc.layers.get(layer_name)
# print(f"ByLayer detected for layer '{layer_name}'.")
if layer:
layer_color_index = layer.dxf.color
# print(layer_color_index)
rgb_color = aci_to_rgb(layer_color_index)
# print(f"Layer '{layer_name}' color index {layer_color_index} converted to RGB: {rgb_color}")
return rgb_color,'bylayer'
else:
# print(f"Layer '{layer_name}' not found. Defaulting to white.")
return (255, 255, 255),'default'
# Default
# print("Unhandled color case. Defaulting to white.")
return (255, 255, 255),'default'
def calculate_distance(pt1, pt2):
dx = pt2[0] - pt1[0]
dy = pt2[1] - pt1[1]
return math.hypot(dx, dy)
def dedupe_colors_preserve_order(hatchcolor):
seen = set()
unique = []
for item in hatchcolor:
# normalize to a tuple (handles [(r,g,b)], [r,g,b], or (r,g,b))
if isinstance(item, (list, tuple)) and len(item) == 1 and isinstance(item[0], (list, tuple)):
color = tuple(item[0])
else:
color = tuple(item) if not isinstance(item, tuple) else item
if color not in seen:
seen.add(color)
unique.append(color)
return unique
def remove_existing_colors(unique_colors, filtered_items):
# extract normalized colors from filtered_items (assumes color is last element)
filtered_set = set()
for row in filtered_items:
if not row:
continue
color = row[-1]
if color is None:
continue
# normalize: make tuple
if isinstance(color, (list, tuple)):
filtered_set.add(tuple(color))
else:
# unexpected type: try to convert
try:
filtered_set.add(tuple(color))
except Exception:
pass
# build new list preserving order, excluding any color that appears in filtered_set
result = []
for c in unique_colors:
# normalize unique color to tuple in case it is list-like
color_t = tuple(c) if not isinstance(c, tuple) else c
if color_t not in filtered_set:
result.append(color_t)
return result
def Legend_Detection(datadoc,dxfile,SearchArray,pdf_content=0):
hatchColors=[]
FinalColors=[]
doc = ezdxf.readfile(dxfile)
doc.header['$MEASUREMENT'] = 1
msp = doc.modelspace()
text_with_positions = []
# if pdf_content:
# doc = fitz.open(stream=pdf_content, filetype="pdf")
# else:
# doc = fitz.open('pdf',datadoc)
if(SearchArray):
for i in range(len(SearchArray)):
print("SearchArray[i][0] = ",SearchArray[i][0])
print("SearchArray[i][1] = ",SearchArray[i][1])
print("SearchArray[i][2] = ",SearchArray[i][2])
if (SearchArray[i][0] and SearchArray[i][1] and SearchArray[i][2]):
print("First IF")
print("SearchArray[i][1] = ",SearchArray[i][0])
print("SearchArray[i][2] = ",SearchArray[i][1])
print("SearchArray[i][3] = ",SearchArray[i][2])
for text_entity in doc.modelspace().query('TEXT MTEXT'):
text = text_entity.text.strip() if hasattr(text_entity, 'text') else ""
# if (text.startswith("P") and len(text) == 3) or (text.startswith("I") and len(text) == 3): # Filter for "Wall"
if(text.startswith(SearchArray[i][0]) and len(text)==int(SearchArray[i][2])):
# print("text = ",text)
position = text_entity.dxf.insert # Extract text position
x, y = position.x, position.y
for text_entity in doc.modelspace().query('TEXT MTEXT'):
NBS = text_entity.text.strip() if hasattr(text_entity, 'text') else ""
# textNBS = None
if (NBS.startswith(SearchArray[i][1])):
positionNBS = text_entity.dxf.insert # Extract text position
xNBS, yNBS = positionNBS.x, positionNBS.y
if(x == xNBS or y == yNBS):
textNBS=NBS
# print("textNBS = ",textNBS)
break
else:
textNBS = None
nearest_hatch = None
min_distance = float('inf') # Initialize with a very large value
detected_color = (255, 255, 255) # Default to white
# Search for the nearest hatch
for hatch in doc.modelspace().query('HATCH'): # Query only hatches
if hatch.paths:
for path in hatch.paths:
if path.type == 1: # PolylinePath
vertices = [v[:2] for v in path.vertices]
# Calculate the centroid of the hatch
centroid_x = sum(v[0] for v in vertices) / len(vertices)
centroid_y = sum(v[1] for v in vertices) / len(vertices)
centroid = (centroid_x, centroid_y)
# Calculate the distance between the text and the hatch centroid
distance = calculate_distance((x, y), centroid)
# Update the nearest hatch if a closer one is found
if distance < min_distance:
min_distance = distance
nearest_hatch = hatch
# Get the color of this hatch
current_color,color_index = get_hatch_color(hatch)
if current_color != (255, 255, 255): # Valid color found
detected_color = current_color
break # Stop checking further paths for this hatch
# Append the detected result only once
text_with_positions.append([text, textNBS, (x, y), detected_color])
elif (SearchArray[i][0] and SearchArray[i][2]):
print("Second IF")
for text_entity in doc.modelspace().query('TEXT MTEXT'):
text = text_entity.text.strip() if hasattr(text_entity, 'text') else ""
# if (text.startswith("P") and len(text) == 3) or (text.startswith("I") and len(text) == 3): # Filter for "Wall"
if(text.startswith(SearchArray[i][0]) and len(text)==int(SearchArray[i][2])):
position = text_entity.dxf.insert # Extract text position
x, y = position.x, position.y
textNBS = None
nearest_hatch = None
min_distance = float('inf') # Initialize with a very large value
detected_color = (255, 255, 255) # Default to white
# Search for the nearest hatch
for hatch in doc.modelspace().query('HATCH'): # Query only hatches
if hatch.paths:
for path in hatch.paths:
if path.type == 1: # PolylinePath
vertices = [v[:2] for v in path.vertices]
# Calculate the centroid of the hatch
centroid_x = sum(v[0] for v in vertices) / len(vertices)
centroid_y = sum(v[1] for v in vertices) / len(vertices)
centroid = (centroid_x, centroid_y)
# Calculate the distance between the text and the hatch centroid
distance = calculate_distance((x, y), centroid)
# Update the nearest hatch if a closer one is found
if distance < min_distance:
min_distance = distance
nearest_hatch = hatch
# Get the color of this hatch
current_color,color_index = get_hatch_color(hatch)
if current_color != (255, 255, 255): # Valid color found
detected_color = current_color
break # Stop checking further paths for this hatch
# Append the detected result only once
text_with_positions.append([text, textNBS, (x, y), detected_color])
# print("text_with_positions=",text_with_positions)
elif(SearchArray[i][0]):
print("Third IF")
for text_entity in doc.modelspace().query('TEXT MTEXT'):
text = text_entity.text.strip() if hasattr(text_entity, 'text') else ""
# if (text.startswith("P") and len(text) == 3) or (text.startswith("I") and len(text) == 3): # Filter for "Wall"
if(text.startswith(SearchArray[i][0])):
position = text_entity.dxf.insert # Extract text position
x, y = position.x, position.y
textNBS = None
nearest_hatch = None
min_distance = float('inf') # Initialize with a very large value
detected_color = (255, 255, 255) # Default to white
# Search for the nearest hatch
for hatch in doc.modelspace().query('HATCH'): # Query only hatches
if hatch.paths:
for path in hatch.paths:
if path.type == 1: # PolylinePath
vertices = [v[:2] for v in path.vertices]
# Calculate the centroid of the hatch
centroid_x = sum(v[0] for v in vertices) / len(vertices)
centroid_y = sum(v[1] for v in vertices) / len(vertices)
centroid = (centroid_x, centroid_y)
# Calculate the distance between the text and the hatch centroid
distance = calculate_distance((x, y), centroid)
# Update the nearest hatch if a closer one is found
if distance < min_distance:
min_distance = distance
nearest_hatch = hatch
# Get the color of this hatch
current_color,color_index = get_hatch_color(hatch)
if current_color != (255, 255, 255): # Valid color found
detected_color = current_color
break # Stop checking further paths for this hatch
# Append the detected result only once
text_with_positions.append([text, textNBS, (x, y), detected_color])
# Group entries by the text value (first element)
print("Grouping")
grouped = {}
for entry in text_with_positions:
key = entry[0]
grouped.setdefault(key, []).append(entry)
# Filter the groups: if any entry in a group has a non-None Text Nbs, keep only one of those
filtered_results = []
for key, entries in grouped.items():
# Find the first entry with a valid textNBS (non-None)
complete = next((entry for entry in entries if entry[1] is not None), None)
if complete:
filtered_results.append(complete)
else:
# If none are complete, you can choose to keep just one entry
filtered_results.append(entries[0])
# Print the filtered results
for text, textNbs, position, detected_color in filtered_results:
print(f"Text: {text}, Text Nbs: {textNbs}, Position: {position}, Nearest Hatch Color: {detected_color}")
text_with_positions=filtered_results
for entity in msp:
if entity.dxftype() == 'HATCH':
for path in entity.paths:
rgb_color,index = get_hatch_color(entity)
hatchColors.append([rgb_color])
unique_colors = dedupe_colors_preserve_order(hatchColors)
unique_new = remove_existing_colors(unique_colors, text_with_positions)
for item in unique_new:
FinalColors.append(['Hatch',None,None,item])
text_with_positions.append(FinalColors)
flat = []
for item in text_with_positions:
if isinstance(item, list) and len(item) > 0 and isinstance(item[0], list):
flat.extend(item) # nested → expand it
else:
flat.append(item)
text_with_positions = flat
print(text_with_positions)
return text_with_positions