owlv2 / owlv2_helper_functions.py
fcxfcx's picture
Upload 549 files
742a3d1 verified
import os
from matplotlib import pyplot as plt
import skimage
from skimage import io as skimage_io
import numpy as np
import re
def rescale_detection_box(boxes, image):
h_img, w_img, _ = image.shape
size = max(h_img, w_img)
pad_h = size - h_img
pad_w = size - w_img
recovered_boxes = []
for box in boxes:
cx, cy, w, h = box
cx = cx * size
cy = cy * size
w = w * size
h = h * size
# if cx < 0 or cx > w_img or cy < 0 or cy > h_img:
# continue;
x1 = cx - w / 2
y1 = cy - h / 2
x2 = cx + w / 2
y2 = cy + h / 2
recovered_boxes.append((x1, y1, x2, y2))
return recovered_boxes
def plot_boxes_on_image(image, text_queries,
scores, boxes, labels,
filename, score_threshold,
output_dir):
colors = ['red', 'green', 'blue', 'orange', 'purple', 'pink', 'cyan', 'magenta', 'lightblue', 'darkorange', 'darkgreen', 'darkred', 'lavender', 'brown', 'gray', 'black']
# 显示原始图片
plt.clf()
plt.imshow(image)
plt.axis('off')
# 绘制边界框
for score, box, label in zip(scores, boxes, labels):
if score < score_threshold:
continue;
x1, y1, x2, y2 = box
# print(f"box coord: {[x1, y1, x2, y2]}")
plt.plot(
[x1, x2, x2, x1, x1], [y1, y1, y2, y2, y1],
color=colors[label], linewidth=0.6, alpha=0.6
)
plt.text(
x1, y2 + 0.015,
f'{text_queries[label]}: {score:1.2f}',
ha='left', va='top', color=colors[label], fontsize=6,
bbox={'facecolor': 'white', 'edgecolor': colors[label], 'boxstyle': 'square,pad=.3', 'alpha': 0.5}
)
# 保存图片到指定路径 OUTPUT_DIR
output_path = os.path.join(output_dir, filename)
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
print(f"Image with boxes saved to {output_path}")
def image_based_plot_boxes_on_image(image, text_queries, scores, boxes, filename,output_dir):
colors = ['red', 'green', 'blue', 'orange', 'cyan', 'magenta', 'lightblue', 'darkorange', 'lavender']
plt.clf()
plt.imshow(image)
plt.axis('off')
for score, box, text_query, color in zip(scores, boxes, text_queries, colors):
x1, y1, x2, y2 = box
plt.plot(
[x1, x2, x2, x1, x1],
[y1, y1, y2, y2, y1],
color=color, linewidth=1
)
plt.text(
x1, y2 + 0.015,
f'{text_query}: {score:1.2f}',
ha='left', va='top', color=color, fontsize=6,
bbox={'facecolor': 'white', 'edgecolor': color, 'boxstyle': 'square,pad=.3','alpha': 0.5}
)
output_path = os.path.join(output_dir, filename)
plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1, dpi=300)
print(f"Image with boxes saved to {output_path}")
def get_iou(bbox1, bbox2):
# 分别解包 bbox1 和 bbox2 的坐标
x1_min, y1_min, x1_max, y1_max = bbox1
x2_min, y2_min, x2_max, y2_max = bbox2
# 计算交集的顶点坐标
inter_x_min = max(x1_min, x2_min)
inter_y_min = max(y1_min, y2_min)
inter_x_max = min(x1_max, x2_max)
inter_y_max = min(y1_max, y2_max)
# 计算交集的宽度和高度(确保为非负值)
inter_width = max(0, inter_x_max - inter_x_min)
inter_height = max(0, inter_y_max - inter_y_min)
inter_area = inter_width * inter_height
# 计算每个边界框的面积
area1 = (x1_max - x1_min) * (y1_max - y1_min)
area2 = (x2_max - x2_min) * (y2_max - y2_min)
# 计算并集面积
union_area = area1 + area2 - inter_area
# 计算 IOU
iou = inter_area / union_area if union_area > 0 else 0
return iou
def read_images(image_dir):
images = []
filenames = sorted(os.listdir(image_dir))
for filename in filenames:
file_path = os.path.join(image_dir, filename)
image_uint8 = skimage_io.imread(file_path)
image = image_uint8.astype(np.float32) / 255.0
images.append(image)
return images, filenames
def preprocess_images(images, model_input_size):
processed_images = []
for image in images:
# Pad image to square
h, w, d = image.shape
size = max(h, w)
image_padded = np.pad(image, ((0, size - h), (0, size - w), (0, 0)), constant_values=0.5,)
# Resize image to fit model's input size
image_resized = skimage.transform.resize(
image_padded,
(model_input_size, model_input_size),
anti_aliasing=True,
)
processed_images.append(image_resized)
# Shape: (b, h, w, d)
return np.array(processed_images, dtype=np.float32)
def prepare_images(image_dir, model_input_size):
filenames = sorted(os.listdir(image_dir))
images = []
for filename in filenames:
file_path = os.path.join(image_dir, filename)
image_uint8 = skimage_io.imread(file_path)
image = image_uint8.astype(np.float32) / 255.0
# Pad image to square
h, w, d = image.shape
size = max(h, w)
image_padded = np.pad(
image, ((0, size - h), (0, size - w), (0, 0)), constant_values=0.5
)
# Resize image to fit model's input size
image_resized = skimage.transform.resize(
image_padded,
(model_input_size, model_input_size),
anti_aliasing=True,
)
images.append(image_resized)
# Shape: (b, h, w, d)
return np.array(images, dtype=np.float32), filenames
def plot_bbox_on_image(image, boxes, objectnesses, threshold, output_file):
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.imshow(image, extent=(0, 1, 1, 0))
ax.set_axis_off()
for i, (box, objectness) in enumerate(zip(boxes, objectnesses)):
if objectness < threshold:
continue
index = i
cx, cy, w, h = box
ax.plot(
[cx - w / 2, cx + w / 2, cx + w / 2, cx - w / 2, cx - w / 2],
[cy - h / 2, cy - h / 2, cy + h / 2, cy + h / 2, cy - h / 2],
color='lime',
)
ax.text(
cx - w / 2 + 0.015,
cy + h / 2 - 0.015,
f'Index {i}: {objectness:1.2f}',
ha='left',
va='bottom',
color='black',
bbox={
'facecolor': 'white',
'edgecolor': 'lime',
'boxstyle': 'square,pad=.3',
},
)
ax.set_xlim(0, 1)
ax.set_ylim(1, 0)
ax.set_title(f'Top objects by objectness')
# 保存图片到指定路径
plt.savefig(output_file, bbox_inches='tight', dpi=300)
plt.close() # 关闭图像以释放内存
print(f"结果图片已保存到: {output_file}")
return index
def top_object_index(objectnesses, threshold):
for i, objectness in enumerate(objectnesses):
if objectness < threshold:
continue
else:
return i
def boxes_filter(pred_bboxes, raw_bboxes, pred_scores, instances):
# Step 1: Filter by pred_scores
filtered_indices = [i for i, score in enumerate(pred_scores) if score >= 0.97]
pred_bboxes = [pred_bboxes[i] for i in filtered_indices]
raw_bboxes = [raw_bboxes[i] for i in filtered_indices]
pred_scores = [pred_scores[i] for i in filtered_indices]
instances = [instances[i] for i in filtered_indices]
# Step 2: Filter by IoU
keep_indices = set(range(len(pred_bboxes)))
for i in range(len(pred_bboxes)):
if i not in keep_indices:
continue
for j in range(i + 1, len(pred_bboxes)):
if j not in keep_indices:
continue
iou = get_iou(pred_bboxes[i], pred_bboxes[j])
if iou > 0.9:
if pred_scores[i] >= pred_scores[j]:
keep_indices.discard(j)
else:
keep_indices.discard(i)
pred_bboxes = [pred_bboxes[i] for i in sorted(keep_indices)]
raw_bboxes = [raw_bboxes[i] for i in sorted(keep_indices)]
pred_scores = [pred_scores[i] for i in sorted(keep_indices)]
instances = [instances[i] for i in sorted(keep_indices)]
# Step 3: Filter by duplicate instances
instance_map = {}
for i in range(len(instances)):
instance = instances[i]
if instance not in instance_map or pred_scores[i] > pred_scores[instance_map[instance]]:
instance_map[instance] = i
unique_indices = sorted(instance_map.values())
pred_bboxes = [pred_bboxes[i] for i in unique_indices]
raw_bboxes = [raw_bboxes[i] for i in unique_indices]
pred_scores = [pred_scores[i] for i in unique_indices]
instances = [instances[i] for i in unique_indices]
return pred_bboxes, raw_bboxes, pred_scores, instances
def format_string(input_string: str) -> str:
# 大写 转 小写
lowercased = input_string.lower()
# 空格 转 下划线
transformed = re.sub(r"\s+", "_", lowercased) # \s+ 匹配一个或多个空白字符
return transformed