Spaces:

kartai
/

CAD-AID

Runtime error

CAD-AID / src /utils /cadaid_postprocess.py

Julia Jørstad

First version

452a352 11 months ago

7.21 kB

	import pandas as pd
	import numpy as np
	import cv2
	from shapely import Point, Polygon
	from shapely.geometry import box
	from shapely.wkt import dumps

	def mask_to_polygons(mask):
	contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if contours:
	return Polygon(contours[0].squeeze())
	return None

	def calculate_centroid(bbox):
	x1,y1,x2,y2 = bbox
	cx = (x1 + x2) / 2
	cy = (y1 + y2) / 2

	return (cx,cy)


	def text_in_segmentation(df_ocr, df_masks,box_col="box", text_col="text"):
	"""
	Finds the target text in segmentation masks. E.g. "Soverom", "Bad".
	Args:
	- df_ocr (Pandas Dataframe): 'text' column with the OCR text, and 'box' column with OCR bounding boxes
	- seg_results (tensor object): contains results from YOLO model
	Returns:
	- Pandas Dataframe: "text" column with the OCR text found in segmentation masks,
	"polygon_coord" column with the polygon coordinates from the segmentation mask.
	"""
	results = []
	if df_ocr is None or df_ocr.empty or df_masks is None or df_masks.empty:

	return pd.DataFrame(columns=[text_col,box_col, "mask_id"])

	for idx,ocr_row in df_ocr.iterrows():
	cx, cy = calculate_centroid(ocr_row["box"])
	for mask_row in df_masks.itertuples(index=False):
	mask_id = mask_row.mask_id
	mask_array = mask_row.polygon

	poly = Polygon(mask_array)
	texts = []
	boxes = []
	for _, row in df_ocr.iterrows():
	cx, cy = calculate_centroid(row["box"])
	if poly.contains(Point(cx, cy)):
	texts.append(row["text"])
	boxes.append(row["box"])
	if texts:
	results.append({
	"text": texts,
	"box": boxes,
	"mask_id": mask_id
	})

	return pd.DataFrame(results)


	def text_in_bboxes(df_ocr, df_objdet, box_col="box", text_col="text"):
	"""
	Finds the target text in object detection bounding boxes.

	Args:
	- df_ocr (Pandas DataFrame): DataFrame with 'text' column and 'box' column of OCR results.
	- df_objdet (Pandas DataFrame): DataFrame containing object detection results with bounding boxes.

	Returns:
	- Pandas DataFrame: Contains "text" column with the OCR text found in bounding boxes,
	"box" column with the corresponding bounding boxes, and "mask_id" for object detection.
	"""
	results = []

	if df_ocr is None or df_ocr.empty or df_objdet is None or df_objdet.empty:
	return pd.DataFrame(columns=[text_col, box_col, "mask_id"])

	for idx, ocr_row in df_ocr.iterrows():
	ocr_box = ocr_row[box_col]
	ocr_cx, ocr_cy = calculate_centroid(ocr_box)

	for objdet_row in df_objdet.itertuples(index=False):
	mask_id = objdet_row.mask_id
	objdet_box = objdet_row.bboxes # Assuming this is also a list/tuple of bounding box coordinates

	# Calculate the bounding box centroid
	objdet_cx, objdet_cy = calculate_centroid(objdet_box)

	# Check if the centroid of OCR bounding box is within the object detection bounding box
	if (objdet_box[0] <= ocr_cx <= objdet_box[2]) and (objdet_box[1] <= ocr_cy <= objdet_box[3]):
	results.append({
	"text": ocr_row[text_col],
	"box": ocr_box,
	"mask_id": mask_id
	})

	return pd.DataFrame(results)

	def oppholdsrom_plantegning(df_oppholdsrom, detected_boxes):
	"""
	Finds the target floorplan object containing the text "Oppholdsrom".
	Used to count the number of bruksenheter in each floorplan.
	"""

	if df_oppholdsrom.empty or not detected_boxes:
	return 0

	detected_polygons = [box(x1, y1, x2, y2) for (x1, y1, x2, y2) in detected_boxes]
	count = 0

	for _, row in df_oppholdsrom.iterrows():
	cx, cy = calculate_centroid(row["box"])
	centroid = Point(cx, cy)

	if any(polygon.contains(centroid) for polygon in detected_polygons):
	count += 1

	return count

	def safe_group_counts(df,key_col, list_col, count_name):
	if df is None or df.empty or key_col not in df.columns:
	return pd.DataFrame({key_col: pd.Series(dtype=int),
	count_name: pd.Series(dtype=int)})

	grouped = (
	df
	.groupby(key_col)[list_col]
	.agg(lambda series_of_lists: series_of_lists.apply(len).sum())
	.reset_index()
	.rename(columns={list_col:count_name})
	)
	return grouped


	def floor_levels_duplicates(floor_levels_df):
	# Dersom det finnes duplikater for etasjenummer, antar vi at plantegningen gjelder denne etasjen.
	dup_floor_count = floor_levels_df['etasje'].value_counts()
	duplicate_floor_nums = dup_floor_count[dup_floor_count > 1].index

	# Lagre etasje nummer for gjeldende plantegning
	if duplicate_floor_nums.size > 0:
	etasje_nummer = pd.DataFrame({'etasje_nr': duplicate_floor_nums})
	floor_levels_df = floor_levels_df.drop_duplicates(subset='etasje', keep='first')

	return floor_levels_df
	return floor_levels_df
	#else:
	# etasje_nummer = pd.DataFrame({'etasje_nr': floor_levels_df['floor_num'].unique()})

	#unique_floors = floor_levels_df['floor_num'].unique()

	def floor_levels_logic(floor_levels_df, df_oppholdsrom_filtered,df_mask):
	# èn etasje og èn plantegning
	antall_etasjer = len(floor_levels_df)

	if antall_etasjer == 1:


	new_dataframe = pd.DataFrame({
	'text': df_oppholdsrom_filtered['text'],
	'mask_id': df_oppholdsrom_filtered['mask_id'],
	'etasje': floor_levels_df['etasje'].reset_index(drop=True)
	})

	return new_dataframe

	elif antall_etasjer > 1:
	# Check if bbox from YOLO segmentation is close to floor level
	# Iterate over df
	result = pd.merge(df_oppholdsrom_filtered, df_mask, on='mask_id', how='inner')

	result_sorted = result.copy()
	result_sorted[['xmin','ymin', 'xmax','ymax']] = pd.DataFrame(
	result_sorted['bboxes'].tolist(),
	index=result_sorted.index

	)

	result_sorted = result_sorted.sort_values(['xmin', 'ymin']).reset_index(drop=True)

	floor_levels_sorted = floor_levels_df.copy()

	floor_levels_sorted[['xmin', 'ymin', 'xmax', 'ymax']] = pd.DataFrame(
	floor_levels_sorted['box'].tolist(),
	index=floor_levels_sorted.index
	)

	floor_levels_sorted = floor_levels_sorted.sort_values(['xmin', 'ymin']).reset_index(drop=True)

	new_dataframe = pd.DataFrame({
	'text': result_sorted['text'],
	'mask_id': result_sorted['mask_id'],
	'etasje': floor_levels_sorted['etasje']
	})

	return new_dataframe






	# To etasjer og to plantegninger
	# må ha segmenteringsmaske med oppholdsrom

	# return df with cols: floor_num, text, bbox, mask id