kuai / diffusion-dpo-ocr /verify_roadtext_annotations.py
Larer's picture
Add files using upload-large-folder tool
5c19a88
"""
验证 RoadText1K 预处理结果的脚本
可视化 GT 标注框与 crop 后图像的对应关系,检查坐标是否正确
"""
import os
import json
import random
from pathlib import Path
from typing import Dict, List
import cv2
import numpy as np
from PIL import Image
# ============================================================================
# 配置
# ============================================================================
CONFIG = {
# GT 图像目录
'gt_dir': '/home/wanghongbo06/baipurui/DATA/RoadText1k_patch_crop/gt',
# 标注文件路径
'annotation_file': '/home/wanghongbo06/baipurui/DATA/RoadText1k_patch_crop/annotations.json',
# 可视化输出目录
'vis_output_dir': './verify_roadtext_vis',
# 可视化图片数量
'num_samples': 20,
# 随机种子
'seed': 42,
}
# ============================================================================
def draw_annotations(img: np.ndarray, ann: Dict, color=(0, 255, 0), thickness=2) -> np.ndarray:
"""在图像上绘制标注框"""
img_vis = img.copy()
polygons = ann.get('polygons', [])
texts = ann.get('texts', [])
ignores = ann.get('ignore', [False] * len(texts))
for i, (poly, text, ignore) in enumerate(zip(polygons, texts, ignores)):
if ignore:
box_color = (128, 128, 128) # 灰色 = ignore
else:
box_color = color
# 绘制多边形
pts = np.array(poly).reshape(-1, 2).astype(np.int32)
cv2.polylines(img_vis, [pts], True, box_color, thickness)
# 标注文本
x, y = int(pts[0][0]), int(pts[0][1]) - 5
if y < 15:
y = int(pts[0][1]) + 15
# 缩短文本显示
display_text = text[:15] + "..." if len(text) > 15 else text
cv2.putText(img_vis, display_text, (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.4, box_color, 1)
return img_vis
def verify_single_image(
img_path: Path,
ann: Dict,
save_path: Path,
) -> Dict:
"""验证单张图片的标注"""
img = cv2.imread(str(img_path))
if img is None:
return {'error': f'Cannot read image: {img_path}'}
h, w = img.shape[:2]
# 统计信息
stats = {
'img_size': (w, h),
'num_boxes': len(ann.get('polygons', [])),
'num_ignored': sum(ann.get('ignore', [])),
'boxes_in_bounds': 0,
'boxes_out_of_bounds': 0,
}
# 检查标注框是否在图像范围内
for poly in ann.get('polygons', []):
xs = [poly[i] for i in range(0, len(poly), 2)]
ys = [poly[i] for i in range(1, len(poly), 2)]
if min(xs) >= 0 and max(xs) <= w and min(ys) >= 0 and max(ys) <= h:
stats['boxes_in_bounds'] += 1
else:
stats['boxes_out_of_bounds'] += 1
print(f" [WARNING] Box out of bounds: x=[{min(xs):.1f}, {max(xs):.1f}], y=[{min(ys):.1f}, {max(ys):.1f}], img={w}x{h}")
# 绘制标注
img_vis = draw_annotations(img, ann)
# 保存
cv2.imwrite(str(save_path), img_vis)
return stats
def main():
random.seed(CONFIG['seed'])
gt_dir = Path(CONFIG['gt_dir'])
ann_file = Path(CONFIG['annotation_file'])
vis_dir = Path(CONFIG['vis_output_dir'])
vis_dir.mkdir(parents=True, exist_ok=True)
print("=" * 60)
print("RoadText1K 标注验证脚本")
print("=" * 60)
print(f"GT dir: {gt_dir}")
print(f"Annotations: {ann_file}")
print(f"Output: {vis_dir}")
print()
# 检查文件是否存在
if not ann_file.exists():
print(f"Error: Annotation file not found: {ann_file}")
return
if not gt_dir.exists():
print(f"Error: GT directory not found: {gt_dir}")
return
# 加载标注
with open(ann_file, 'r', encoding='utf-8') as f:
annotations = json.load(f)
print(f"总共 {len(annotations)} 张图片有标注")
# 全局统计
total_boxes = 0
total_ignored = 0
total_in_bounds = 0
total_out_of_bounds = 0
# 计算所有图片的统计
for img_name, ann in annotations.items():
total_boxes += len(ann.get('polygons', []))
total_ignored += sum(ann.get('ignore', []))
# 检查边界
for poly in ann.get('polygons', []):
xs = [poly[i] for i in range(0, len(poly), 2)]
ys = [poly[i] for i in range(1, len(poly), 2)]
if min(xs) >= 0 and max(xs) <= 512 and min(ys) >= 0 and max(ys) <= 512:
total_in_bounds += 1
else:
total_out_of_bounds += 1
print(f"\n全局统计:")
print(f" 总文本框: {total_boxes}")
print(f" 忽略框: {total_ignored}")
print(f" 有效框 (非忽略): {total_boxes - total_ignored}")
print(f" 框在图像范围内: {total_in_bounds}")
print(f" 框超出范围: {total_out_of_bounds}")
if total_out_of_bounds > 0:
print(f"\n⚠️ 有 {total_out_of_bounds} 个框超出图像范围!这可能是坐标转换问题。")
# 随机选取一些图片进行可视化
img_names = list(annotations.keys())
num_samples = min(CONFIG['num_samples'], len(img_names))
selected = random.sample(img_names, num_samples)
print(f"\n随机选取 {num_samples} 张图片进行可视化...")
for img_name in selected:
ann = annotations[img_name]
img_path = gt_dir / img_name
if not img_path.exists():
# 尝试其他扩展名
img_path = gt_dir / (Path(img_name).stem + '.jpg')
if not img_path.exists():
print(f" [SKIP] Image not found: {img_name}")
continue
save_path = vis_dir / f"verify_{img_name}"
stats = verify_single_image(img_path, ann, save_path)
if 'error' not in stats:
print(f" [OK] {img_name}: {stats['num_boxes']} boxes, "
f"{stats['boxes_out_of_bounds']} out of bounds")
print(f"\n可视化结果保存到: {vis_dir}")
# 额外检查:打印一些标注样例
print("\n" + "=" * 60)
print("标注样例 (前 3 张图):")
print("=" * 60)
for i, (img_name, ann) in enumerate(list(annotations.items())[:3]):
print(f"\n[{i+1}] {img_name}")
print(f" crop_position: {ann.get('crop_position', 'N/A')}")
print(f" num_boxes: {len(ann.get('polygons', []))}")
for j, (poly, text, ignore) in enumerate(zip(
ann.get('polygons', [])[:2], # 只显示前2个
ann.get('texts', [])[:2],
ann.get('ignore', [])[:2]
)):
xs = [poly[k] for k in range(0, len(poly), 2)]
ys = [poly[k] for k in range(1, len(poly), 2)]
print(f" Box {j}: x=[{min(xs):.1f}, {max(xs):.1f}], y=[{min(ys):.1f}, {max(ys):.1f}], "
f"text='{text[:20]}', ignore={ignore}")
if __name__ == '__main__':
main()