Spaces:

Ehtesham123
/

OCR_AEB_Serial_Number

Sleeping

App Files Files Community

Ehtesham123 commited on Aug 8, 2025

Commit

5b19d10

verified ·

1 Parent(s): 8fe1e58

Upload 2 files

Browse files

Files changed (2) hide show

STD_detect.py +46 -0
STR_recognize.py +18 -0

STD_detect.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import cv2
+import numpy as np
+from ultralytics import YOLO
+from PIL import Image
+class OBBPredictor:
+    def __init__(self, model_path):
+        self.model = YOLO(model_path)
+    @staticmethod
+    def order_points(pts):
+        rect = np.zeros((4, 2), dtype=np.float32)
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]  # top-left
+        rect[2] = pts[np.argmax(s)]  # bottom-right
+        diff = np.diff(pts, axis=1)
+        rect[1] = pts[np.argmin(diff)]  # top-right
+        rect[3] = pts[np.argmax(diff)]  # bottom-left
+        return rect
+    @staticmethod
+    def crop_obb_region(image, points):
+        ordered_pts = OBBPredictor.order_points(points).astype(np.float32)
+        width = int(max(np.linalg.norm(ordered_pts[0] - ordered_pts[1]),
+                        np.linalg.norm(ordered_pts[2] - ordered_pts[3])))
+        height = int(max(np.linalg.norm(ordered_pts[1] - ordered_pts[2]),
+                         np.linalg.norm(ordered_pts[3] - ordered_pts[0])))
+        dst_pts = np.array([
+            [0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]
+        ], dtype=np.float32)
+        M = cv2.getPerspectiveTransform(ordered_pts, dst_pts)
+        warped = cv2.warpPerspective(image, M, (width, height))
+        return Image.fromarray(warped)
+    def predict(self, image_pil):
+        image_np = np.array(image_pil)
+        results = self.model(image_np)
+        crops = []
+        for result in results:
+            if hasattr(result.obb, "xyxyxyxy") and len(result.obb.xyxyxyxy) > 0:
+                for box in result.obb.xyxyxyxy:
+                    points = box.cpu().numpy()
+                    cropped = self.crop_obb_region(image_np, points)
+                    crops.append(cropped)
+        return crops

STR_recognize.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import torch
+from PIL import Image
+from strhub.data.module import SceneTextDataModule
+from strhub.models.utils import load_from_checkpoint
+class TextRecognizer:
+    def __init__(self, ckpt_path, device='cpu'):
+        self.device = device
+        self.str = load_from_checkpoint(ckpt_path).eval().to(device)
+        self.img_transform = SceneTextDataModule.get_transform(self.str.hparams.img_size)
+    def recognize(self, image_pil):
+        image_tensor = self.img_transform(image_pil).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            logits = self.str(image_tensor)
+            pred = logits.softmax(-1)
+            label, _ = self.str.tokenizer.decode(pred)
+        return label[0]