ocr

File size: 2,237 Bytes

2b45a96

import os
import json
import cv2
import numpy as np

input_label_file = "D:/MyCode/Python/Model/paddleocr/total_text/test/test.txt"
image_root = "D:/MyCode/Python/Model/paddleocr/total_text/test"
output_label_file = "D:/MyCode/Python/Model/paddleocr/total_text/test/test_rec.txt"
crop_output_dir = os.path.join(image_root, "rec_crop")
os.makedirs(crop_output_dir, exist_ok=True)

with open(input_label_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

out_lines = []
crop_id = 0

for line in lines:
    img_path_rel, anns = line.strip().split('\t')
    img_path = os.path.join(image_root, img_path_rel)
    anns = json.loads(anns)

    if not os.path.exists(img_path):
        print(f"[WARNING] Không tìm thấy ảnh: {img_path}")
        continue

    img = cv2.imread(img_path)
    if img is None:
        print(f"[WARNING] Lỗi đọc ảnh: {img_path}")
        continue

    height, width = img.shape[:2]

    for ann in anns:
        text = ann['transcription']
        points = ann['points']

        if text.strip().lower() == "###" or not text.strip():
            continue

        pts = np.array(points, dtype="float32")
        x, y, w, h = cv2.boundingRect(pts.astype("int"))

        # Giới hạn x, y, w, h nằm trong ảnh
        x = max(0, x)
        y = max(0, y)
        if x + w > width or y + h > height:
            print(f"[WARNING] Box vượt quá kích thước ảnh ({img_path}): x={x}, y={y}, w={w}, h={h}")
            continue

        cropped = img[y:y+h, x:x+w]
        if cropped is None or cropped.size == 0:
            print(f"[WARNING] Ảnh crop rỗng ({img_path}), bỏ qua.")
            continue

        crop_img_name = f"{os.path.splitext(os.path.basename(img_path))[0]}_crop_{crop_id}.jpg"
        crop_img_path = os.path.join(crop_output_dir, crop_img_name)
        cv2.imwrite(crop_img_path, cropped)

        out_line = f"rec_crop/{crop_img_name}\t{text.strip()}"
        out_lines.append(out_line)
        crop_id += 1

with open(output_label_file, "w", encoding="utf-8") as f:
    f.write('\n'.join(out_lines))

print(f"✅ Đã tạo {len(out_lines)} mẫu recognition tại: {output_label_file}")