AutoWeightLoggergeetha

Sleeping

App Files Files Community

Sanjayraju30 commited on Jun 20, 2025

Commit

956dff8

verified ·

1 Parent(s): 0e2ed11

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +70 -56

ocr_engine.py CHANGED Viewed

@@ -32,25 +32,32 @@ def estimate_brightness(img):
     return np.mean(gray)
 def preprocess_image(img):
-    """Preprocess image with aggressive contrast and noise handling."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     brightness = estimate_brightness(img)
-    # Maximum CLAHE with adjusted clip for better digit enhancement
-    clahe_clip = 12.0 if brightness < 80 else 8.0
-    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(4, 4))
     enhanced = clahe.apply(gray)
     save_debug_image(enhanced, "01_preprocess_clahe")
-    # Stronger edge-preserving blur
-    blurred = cv2.bilateralFilter(enhanced, 7, 100, 100)
     save_debug_image(blurred, "02_preprocess_blur")
-    # Adaptive thresholding with smaller blocks
-    block_size = max(3, min(11, int(img.shape[0] / 40) * 2 + 1))
-    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                   cv2.THRESH_BINARY_INV, block_size, 2)
-    # Morphological operations for robust digit segmentation
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=6)
     save_debug_image(thresh, "03_preprocess_morph")
     return thresh, enhanced
@@ -58,12 +65,12 @@ def correct_rotation(img):
     """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        edges = cv2.Canny(gray, 15, 60, apertureSize=3)
-        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=20, minLineLength=10, maxLineGap=3)
         if lines is not None:
             angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
             angle = np.median(angles)
-            if abs(angle) > 0.2:
                 h, w = img.shape[:2]
                 center = (w // 2, h // 2)
                 M = cv2.getRotationMatrix2D(center, angle, 1.0)
@@ -76,20 +83,22 @@ def correct_rotation(img):
         return img
 def detect_roi(img):
-    """Detect region of interest with flexible contour filtering."""
     try:
         save_debug_image(img, "04_original")
         thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        block_sizes = [max(3, min(11, int(img.shape[0] / s) * 2 + 1)) for s in [4, 8, 12]]
         valid_contours = []
         img_area = img.shape[0] * img.shape[1]
         for block_size in block_sizes:
-            temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                               cv2.THRESH_BINARY_INV, block_size, 2)
             kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-            temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=6)
             save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
             contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
@@ -98,15 +107,15 @@ def detect_roi(img):
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
-                if (150 < area < (img_area * 0.8) and
-                    0.15 <= aspect_ratio <= 12.0 and w > 40 and h > 15 and roi_brightness > 30):
                     valid_contours.append((c, area * roi_brightness))
                     logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
         if valid_contours:
             contour, _ = max(valid_contours, key=lambda x: x[1])
             x, y, w, h = cv2.boundingRect(contour)
-            padding = max(10, min(30, int(min(w, h) * 0.25)))
             x, y = max(0, x - padding), max(0, y - padding)
             w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
             roi_img = img[y:y+h, x:x+w]
@@ -123,83 +132,88 @@ def detect_roi(img):
         return img, None
 def detect_digit_template(digit_img, brightness):
-    """Digit recognition using template matching with adjusted patterns."""
     try:
         h, w = digit_img.shape
-        if h < 8 or w < 4:
             logging.debug("Digit image too small for template matching.")
             return None
-        # Adjusted digit templates for seven-segment display
         digit_templates = {
             '0': np.array([[1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1],
-                           [1, 1, 1, 1, 1]]),
             '1': np.array([[0, 0, 1, 0, 0],
                            [0, 0, 1, 0, 0],
                            [0, 0, 1, 0, 0],
                            [0, 0, 1, 0, 0],
-                           [0, 0, 1, 0, 0]]),
             '2': np.array([[1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
                            [1, 1, 1, 1, 1],
                            [1, 1, 0, 0, 0],
-                           [1, 1, 1, 1, 1]]),
             '3': np.array([[1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
-                           [0, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
-                           [1, 1, 1, 1, 1]]),
             '4': np.array([[1, 1, 0, 0, 1],
                            [1, 1, 0, 0, 1],
                            [1, 1, 1, 1, 1],
                            [0, 0, 0, 0, 1],
-                           [0, 0, 0, 0, 1]]),
             '5': np.array([[1, 1, 1, 1, 1],
                            [1, 1, 0, 0, 0],
                            [1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
-                           [1, 1, 1, 1, 1]]),
             '6': np.array([[1, 1, 1, 1, 1],
                            [1, 1, 0, 0, 0],
                            [1, 1, 1, 1, 1],
                            [1, 0, 0, 1, 1],
-                           [1, 1, 1, 1, 1]]),
             '7': np.array([[1, 1, 1, 1, 1],
                            [0, 0, 0, 0, 1],
                            [0, 0, 0, 0, 1],
                            [0, 0, 0, 0, 1],
-                           [0, 0, 0, 0, 1]]),
             '8': np.array([[1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
-                           [1, 1, 1, 1, 1]]),
             '9': np.array([[1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
-                           [1, 1, 1, 1, 1]]),
             '.': np.array([[0, 0, 0],
                            [0, 1, 0],
-                           [0, 0, 0]])
         }
-        # Resize digit_img to match template size (5x5 for digits, 3x3 for decimal)
-        digit_img_resized = cv2.resize(digit_img, (5, 5), interpolation=cv2.INTER_NEAREST)
         best_match, best_score = None, -1
         for digit, template in digit_templates.items():
-            if digit == '.':
-                digit_img_resized = cv2.resize(digit_img, (3, 3), interpolation=cv2.INTER_NEAREST)
             result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
             _, max_val, _, _ = cv2.minMaxLoc(result)
-            if max_val > 0.65 and max_val > best_score:  # Lowered threshold for better match
                 best_score = max_val
                 best_match = digit
         logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
-        return best_match if best_score > 0.65 else None
     except Exception as e:
         logging.error(f"Template digit detection failed: {str(e)}")
         return None
@@ -212,8 +226,8 @@ def perform_ocr(img, roi_bbox):
         pil_img = Image.fromarray(enhanced)
         save_debug_image(pil_img, "07_ocr_input")
-        # Tesseract with flexible numeric config
-        custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
         text = pytesseract.image_to_string(pil_img, config=custom_config)
         logging.info(f"Tesseract raw output: {text}")
@@ -224,7 +238,7 @@ def perform_ocr(img, roi_bbox):
         text = text.strip('.')
         if text and re.fullmatch(r"^\d*\.?\d*$", text):
             text = text.lstrip('0') or '0'
-            confidence = 97.0 if len(text.replace('.', '')) >= 3 else 94.0
             logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
             return text, confidence
@@ -234,7 +248,7 @@ def perform_ocr(img, roi_bbox):
         digits_info = []
         for c in contours:
             x, y, w, h = cv2.boundingRect(c)
-            if w > 6 and h > 8 and 0.1 <= w/h <= 2.5:  # Loosened size and aspect ratio
                 digits_info.append((x, x+w, y, y+h))
         if digits_info:
@@ -251,7 +265,7 @@ def perform_ocr(img, roi_bbox):
                 digit = detect_digit_template(digit_crop, brightness)
                 if digit:
                     recognized_text += digit
-                elif x_min - prev_x_max < 6 and prev_x_max != -float('inf'):  # Adjusted decimal gap
                     recognized_text += '.'
                 prev_x_max = x_max
@@ -261,7 +275,7 @@ def perform_ocr(img, roi_bbox):
             text = text.strip('.')
             if text and re.fullmatch(r"^\d*\.?\d*$", text):
                 text = text.lstrip('0') or '0'
-                confidence = 92.0 if len(text.replace('.', '')) >= 3 else 89.0
                 logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
                 return text, confidence
@@ -279,17 +293,17 @@ def extract_weight_from_image(pil_img):
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
-        conf_threshold = 0.75 if brightness > 100 else 0.55  # Lowered threshold
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:
-            conf_threshold *= 1.05 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.15) else 1.0
         result, confidence = perform_ocr(roi_img, roi_bbox)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
-                if 0.01 <= weight <= 1000:
                     logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Weight {result} out of range.")
@@ -298,10 +312,10 @@ def extract_weight_from_image(pil_img):
         logging.info("Primary OCR failed, using full image fallback.")
         result, confidence = perform_ocr(img, None)
-        if result and confidence >= conf_threshold * 0.8 * 100:  # Adjusted fallback threshold
             try:
                 weight = float(result)
-                if 0.01 <= weight <= 1000:
                     logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Full image weight {result} out of range.")

     return np.mean(gray)
 def preprocess_image(img):
+    """Preprocess image with dynamic contrast and noise handling."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     brightness = estimate_brightness(img)
+    # Dynamic CLAHE based on brightness
+    clahe_clip = 10.0 if brightness < 100 else 6.0
+    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     save_debug_image(enhanced, "01_preprocess_clahe")
+    # Edge-preserving blur with adaptive parameters
+    blur_diameter = 9 if brightness < 100 else 7
+    blurred = cv2.bilateralFilter(enhanced, blur_diameter, 75, 75)
     save_debug_image(blurred, "02_preprocess_blur")
+    # Dynamic adaptive thresholding
+    block_size = max(5, min(21, int(img.shape[0] / 30) * 2 + 1))
+    thresh = cv2.adaptiveThreshold(
+        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY_INV, block_size, 5
+    )
+    # Morphological operations for better digit segmentation
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=4)
     save_debug_image(thresh, "03_preprocess_morph")
     return thresh, enhanced
     """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=20, maxLineGap=5)
         if lines is not None:
             angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
             angle = np.median(angles)
+            if abs(angle) > 0.5:
                 h, w = img.shape[:2]
                 center = (w // 2, h // 2)
                 M = cv2.getRotationMatrix2D(center, angle, 1.0)
         return img
 def detect_roi(img):
+    """Detect region of interest with multi-scale contour analysis."""
     try:
         save_debug_image(img, "04_original")
         thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        block_sizes = [max(5, min(21, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 15]]
         valid_contours = []
         img_area = img.shape[0] * img.shape[1]
         for block_size in block_sizes:
+            temp_thresh = cv2.adaptiveThreshold(
+                enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                cv2.THRESH_BINARY_INV, block_size, 5
+            )
             kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+            temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=4)
             save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
             contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
+                if (100 < area < (img_area * 0.9) and
+                    0.1 <= aspect_ratio <= 15.0 and w > 30 and h > 10 and roi_brightness > 20):
                     valid_contours.append((c, area * roi_brightness))
                     logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
         if valid_contours:
             contour, _ = max(valid_contours, key=lambda x: x[1])
             x, y, w, h = cv2.boundingRect(contour)
+            padding = max(8, min(25, int(min(w, h) * 0.3)))
             x, y = max(0, x - padding), max(0, y - padding)
             w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
             roi_img = img[y:y+h, x:x+w]
         return img, None
 def detect_digit_template(digit_img, brightness):
+    """Digit recognition using template matching with refined patterns."""
     try:
         h, w = digit_img.shape
+        if h < 6 or w < 3:
             logging.debug("Digit image too small for template matching.")
             return None
+        # Refined digit templates for seven-segment display
         digit_templates = {
             '0': np.array([[1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1],
                            [1, 0, 0, 0, 1],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '1': np.array([[0, 0, 1, 0, 0],
                            [0, 0, 1, 0, 0],
                            [0, 0, 1, 0, 0],
                            [0, 0, 1, 0, 0],
+                           [0, 0, 1, 0, 0]], dtype=np.float32),
             '2': np.array([[1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
                            [1, 1, 1, 1, 1],
                            [1, 1, 0, 0, 0],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '3': np.array([[1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
+                           [1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '4': np.array([[1, 1, 0, 0, 1],
                            [1, 1, 0, 0, 1],
                            [1, 1, 1, 1, 1],
                            [0, 0, 0, 0, 1],
+                           [0, 0, 0, 0, 1]], dtype=np.float32),
             '5': np.array([[1, 1, 1, 1, 1],
                            [1, 1, 0, 0, 0],
                            [1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '6': np.array([[1, 1, 1, 1, 1],
                            [1, 1, 0, 0, 0],
                            [1, 1, 1, 1, 1],
                            [1, 0, 0, 1, 1],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '7': np.array([[1, 1, 1, 1, 1],
                            [0, 0, 0, 0, 1],
                            [0, 0, 0, 0, 1],
                            [0, 0, 0, 0, 1],
+                           [0, 0, 0, 0, 1]], dtype=np.float32),
             '8': np.array([[1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '9': np.array([[1, 1, 1, 1, 1],
                            [1, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1],
                            [0, 0, 0, 1, 1],
+                           [1, 1, 1, 1, 1]], dtype=np.float32),
             '.': np.array([[0, 0, 0],
                            [0, 1, 0],
+                           [0, 0, 0]], dtype=np.float32)
         }
+        # Resize digit image to match template size
+        target_size = (5, 5) if h > w else (3, 3)  # Adjust for decimal point
+        digit_img_resized = cv2.resize(digit_img, target_size, interpolation=cv2.INTER_AREA)
+        digit_img_resized = (digit_img_resized > 128).astype(np.float32)  # Binarize
         best_match, best_score = None, -1
         for digit, template in digit_templates.items():
+            if digit == '.' and target_size != (3, 3):
+                continue
+            if digit != '.' and target_size == (3, 3):
+                continue
             result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
             _, max_val, _, _ = cv2.minMaxLoc(result)
+            if max_val > 0.6 and max_val > best_score:  # Lowered threshold
                 best_score = max_val
                 best_match = digit
         logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
+        return best_match if best_score > 0.6 else None
     except Exception as e:
         logging.error(f"Template digit detection failed: {str(e)}")
         return None
         pil_img = Image.fromarray(enhanced)
         save_debug_image(pil_img, "07_ocr_input")
+        # Tesseract with optimized numeric config
+        custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
         text = pytesseract.image_to_string(pil_img, config=custom_config)
         logging.info(f"Tesseract raw output: {text}")
         text = text.strip('.')
         if text and re.fullmatch(r"^\d*\.?\d*$", text):
             text = text.lstrip('0') or '0'
+            confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
             logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
             return text, confidence
         digits_info = []
         for c in contours:
             x, y, w, h = cv2.boundingRect(c)
+            if w > 5 and h > 6 and 0.05 <= w/h <= 3.0:
                 digits_info.append((x, x+w, y, y+h))
         if digits_info:
                 digit = detect_digit_template(digit_crop, brightness)
                 if digit:
                     recognized_text += digit
+                elif x_min - prev_x_max < 8 and prev_x_max != -float('inf'):
                     recognized_text += '.'
                 prev_x_max = x_max
             text = text.strip('.')
             if text and re.fullmatch(r"^\d*\.?\d*$", text):
                 text = text.lstrip('0') or '0'
+                confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
                 logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
                 return text, confidence
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
+        conf_threshold = 0.7 if brightness > 80 else 0.5
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:
+            conf_threshold *= 1.1 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.1) else 1.0
         result, confidence = perform_ocr(roi_img, roi_bbox)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
+                if 0.001 <= weight <= 2000:
                     logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Weight {result} out of range.")
         logging.info("Primary OCR failed, using full image fallback.")
         result, confidence = perform_ocr(img, None)
+        if result and confidence >= conf_threshold * 0.9 * 100:
             try:
                 weight = float(result)
+                if 0.001 <= weight <= 2000:
                     logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Full image weight {result} out of range.")