AutoWeightLoggergeetha

Sleeping

App Files Files Community

Sanjayraju30 commited on Jun 23, 2025

Commit

301eb4d

verified ·

1 Parent(s): dc1f7da

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +40 -262

ocr_engine.py CHANGED Viewed

@@ -3,286 +3,64 @@ import numpy as np
 import cv2
 import re
 import logging
-from datetime import datetime
-import os
 from PIL import Image
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Directory for debug images
-DEBUG_DIR = "debug_images"
-os.makedirs(DEBUG_DIR, exist_ok=True)
-def save_debug_image(img, filename_suffix, prefix=""):
-    """Save image to debug directory with timestamp."""
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-    filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
-    if isinstance(img, Image.Image):
-        img.save(filename)
-    elif len(img.shape) == 3:
-        cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-    else:
-        cv2.imwrite(filename, img)
-    logging.info(f"Saved debug image: {filename}")
-def estimate_brightness(img):
-    """Estimate image brightness."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    return np.mean(gray)
-def preprocess_image(img):
-    """Preprocess image with enhanced contrast and adaptive thresholding."""
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    brightness = estimate_brightness(img)
-    # Apply CLAHE with dynamic clip limit
-    clahe_clip = 10.0 if brightness < 80 else 5.0
-    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
-    enhanced = clahe.apply(gray)
-    save_debug_image(enhanced, "01_preprocess_clahe")
-    # Stronger blur to reduce noise
-    blurred = cv2.GaussianBlur(enhanced, (7, 7), 1.0)
-    save_debug_image(blurred, "02_preprocess_blur")
-    # Adaptive thresholding with larger block size
-    block_size = max(11, min(41, int(img.shape[0] / 15) * 2 + 1))
     thresh = cv2.adaptiveThreshold(
-        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY_INV, block_size, 5
     )
-    # Morphological operations for better digit separation
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
-    save_debug_image(thresh, "03_preprocess_morph")
-    return thresh, enhanced
-def correct_rotation(img):
-    """Correct image rotation using edge detection."""
-    try:
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
-        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=20, minLineLength=10, maxLineGap=5)
-        if lines is not None:
-            angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
-            angle = np.median(angles)
-            if abs(angle) > 0.5:
-                h, w = img.shape[:2]
-                center = (w // 2, h // 2)
-                M = cv2.getRotationMatrix2D(center, angle, 1.0)
-                img = cv2.warpAffine(img, M, (w, h))
-                save_debug_image(img, "00_rotated_image")
-                logging.info(f"Applied rotation: {angle:.2f} degrees")
-        return img
-    except Exception as e:
-        logging.error(f"Rotation correction failed: {str(e)}")
-        return img
-def detect_roi(img):
-    """Detect region of interest with relaxed contour analysis."""
-    try:
-        save_debug_image(img, "04_original")
-        thresh, enhanced = preprocess_image(img)
-        brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        block_sizes = [max(11, min(41, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 15]]
-        valid_contours = []
-        img_area = img.shape[0] * img.shape[1]
-        for block_size in block_sizes:
-            temp_thresh = cv2.adaptiveThreshold(
-                enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                cv2.THRESH_BINARY_INV, block_size, 5
-            )
-            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-            temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
-            save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
-            contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            for c in contours:
-                area = cv2.contourArea(c)
-                x, y, w, h = cv2.boundingRect(c)
-                roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
-                aspect_ratio = w / h
-                if (30 < area < (img_area * 0.98) and
-                    0.02 <= aspect_ratio <= 25.0 and w > 15 and h > 5 and roi_brightness > 10):
-                    valid_contours.append((c, area * roi_brightness))
-                    logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
-        if valid_contours:
-            contour, _ = max(valid_contours, key=lambda x: x[1])
-            x, y, w, h = cv2.boundingRect(contour)
-            padding = max(5, min(25, int(min(w, h) * 0.5)))
-            x, y = max(0, x - padding), max(0, y - padding)
-            w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
-            roi_img = img[y:y+h, x:x+w]
-            save_debug_image(roi_img, "06_detected_roi")
-            logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
-            return roi_img, (x, y, w, h)
-        logging.info("No ROI found, using full image.")
-        save_debug_image(img, "06_no_roi_fallback")
-        return img, None
-    except Exception as e:
-        logging.error(f"ROI detection failed: {str(e)}")
-        save_debug_image(img, "06_roi_error_fallback")
-        return img, None
-def detect_digit_template(digit_img, brightness):
-    """Digit recognition with adjusted template matching."""
-    try:
-        h, w = digit_img.shape
-        if h < 5 or w < 2:
-            logging.debug("Digit image too small for template matching.")
-            return None
-        digit_templates = {
-            '0': [np.array([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '1': [np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0]], dtype=np.float32)],
-            '2': [np.array([[1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '3': [np.array([[1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '4': [np.array([[1, 1, 0, 0, 1], [1, 1, 0, 0, 1], [1, 1, 1, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1]], dtype=np.float32)],
-            '5': [np.array([[1, 1, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '6': [np.array([[1, 1, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1], [1, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '7': [np.array([[1, 1, 1, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1]], dtype=np.float32)],
-            '8': [np.array([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '9': [np.array([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
-            '.': [np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)]
-        }
-        sizes = [(5, 5), (4, 4), (3, 3)] if h > w else [(3, 3), (2, 2)]
-        best_match, best_score = None, -1
-        for size in sizes:
-            digit_img_resized = cv2.resize(digit_img, size, interpolation=cv2.INTER_AREA)
-            digit_img_resized = (digit_img_resized > 90).astype(np.float32)  # Adjusted binarization threshold
-            for digit, templates in digit_templates.items():
-                for template in templates:
-                    if template.shape[0] != size[0] or template.shape[1] != size[1]:
-                        continue
-                    result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
-                    _, max_val, _, _ = cv2.minMaxLoc(result)
-                    if max_val > 0.50 and max_val > best_score:  # Lowered threshold
-                        best_score = max_val
-                        best_match = digit
-        logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
-        return best_match if best_score > 0.50 else None
-    except Exception as e:
-        logging.error(f"Template digit detection failed: {str(e)}")
-        return None
-def perform_ocr(img, roi_bbox):
-    """Perform OCR with enhanced Tesseract and template fallback."""
-    try:
-        thresh, enhanced = preprocess_image(img)
-        brightness = estimate_brightness(img)
-        pil_img = Image.fromarray(enhanced)
-        save_debug_image(pil_img, "07_ocr_input")
-        # Enhanced Tesseract configurations
-        configs = [
-            r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.',  # Single line
-            r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.',  # Block of text
-            r'--oem 3 --psm 10 -c tessedit_char_whitelist=0123456789.'  # Single character
-        ]
-        for config in configs:
-            text = pytesseract.image_to_string(pil_img, config=config)
-            logging.info(f"Tesseract raw output (config {config}): {text}")
-            text = re.sub(r"[^\d\.]", "", text)
-            if text.count('.') > 1:
-                text = text.replace('.', '', text.count('.') - 1)
-            text = text.strip('.')
-            if text and re.fullmatch(r"^\d*\.?\d*$", text):
-                text = text.lstrip('0') or '0'
-                confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
-                logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
-                return text, confidence
-        # Enhanced template-based detection
-        logging.info("Tesseract failed, using template-based detection.")
-        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        digits_info = []
-        for c in contours:
-            x, y, w, h = cv2.boundingRect(c)
-            if w > 3 and h > 4 and 0.02 <= w/h <= 5.0:
-                digits_info.append((x, x+w, y, y+h))
-        if digits_info:
-            digits_info.sort(key=lambda x: x[0])
-            recognized_text = ""
-            prev_x_max = -float('inf')
-            for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
-                x_min, y_min = max(0, x_min), max(0, y_min)
-                x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
-                if x_max <= x_min or y_max <= y_min:
-                    continue
-                digit_crop = thresh[y_min:y_max, x_min:x_max]
-                save_debug_image(digit_crop, f"08_digit_crop_{idx}")
-                digit = detect_digit_template(digit_crop, brightness)
-                if digit:
-                    recognized_text += digit
-                elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
-                    recognized_text += '.'
-                prev_x_max = x_max
-            text = re.sub(r"[^\d\.]", "", recognized_text)
-            if text.count('.') > 1:
-                text = text.replace('.', '', text.count('.') - 1)
-            text = text.strip('.')
-            if text and re.fullmatch(r"^\d*\.?\d*$", text):
-                text = text.lstrip('0') or '0'
-                confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
-                logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
-                return text, confidence
-        logging.info("No valid digits detected.")
-        return None, 0.0
-    except Exception as e:
-        logging.error(f"OCR failed: {str(e)}")
-        return None, 0.0
 def extract_weight_from_image(pil_img):
-    """Extract weight from any digital scale image with adjusted thresholds."""
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        save_debug_image(img, "00_input_image")
-        img = correct_rotation(img)
-        brightness = estimate_brightness(img)
-        conf_threshold = 0.60 if brightness > 70 else 0.40  # Lowered threshold
-        # Try ROI-based detection
-        roi_img, roi_bbox = detect_roi(img)
-        if roi_bbox:
-            conf_threshold *= 1.2 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.03) else 1.0
-        result, confidence = perform_ocr(roi_img, roi_bbox)
-        if result and confidence >= conf_threshold * 100:
-            try:
-                weight = float(result)
-                if 0.001 <= weight <= 5000:
-                    logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
-                    return result, confidence
-                logging.warning(f"Weight {result} out of range.")
-            except ValueError:
-                logging.warning(f"Invalid weight format: {result}")
-        # Full image fallback with relaxed threshold
-        logging.info("Primary OCR failed, using full image fallback.")
-        result, confidence = perform_ocr(img, None)
-        if result and confidence >= conf_threshold * 0.80 * 100:
-            try:
-                weight = float(result)
-                if 0.001 <= weight <= 5000:
-                    logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
-                    return result, confidence
-                logging.warning(f"Full image weight {result} out of range.")
-            except ValueError:
-                logging.warning(f"Invalid full image weight format: {result}")
-        logging.info("No valid weight detected.")
         return "Not detected", 0.0
     except Exception as e:
-        logging.error(f"Weight extraction failed: {str(e)}")
-        return "Not detected", 0.0

 import cv2
 import re
 import logging
 from PIL import Image
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def preprocess_for_ocr(img):
+    """Apply grayscale, blur, and threshold to prepare image for OCR."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+    # Adaptive threshold
     thresh = cv2.adaptiveThreshold(
+        blurred, 255,
+        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+        cv2.THRESH_BINARY,
+        11, 2
     )
+    # Invert to make text white on black
+    inverted = cv2.bitwise_not(thresh)
+    return inverted
 def extract_weight_from_image(pil_img):
+    """Extract weight reading from an image using pytesseract."""
     try:
+        # Convert PIL to OpenCV
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        # Preprocess
+        processed_img = preprocess_for_ocr(img)
+        # Tesseract config
+        config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
+        # Run OCR
+        text = pytesseract.image_to_string(processed_img, config=config)
+        # Clean text
+        text = text.strip().replace('\n', '').replace(' ', '')
+        text = re.sub(r"[^\d.]", "", text)
+        # Handle multiple dots
+        if text.count('.') > 1:
+            text = text.replace('.', '', text.count('.') - 1)
+        if text.startswith('.'):
+            text = '0' + text
+        # Validate
+        if text and re.fullmatch(r"\d*\.?\d*", text):
+            value = float(text)
+            if 0.001 <= value <= 5000:
+                return text, 90.0  # Return with fixed confidence
+            else:
+                logging.warning(f"Detected weight out of range: {value}")
         return "Not detected", 0.0
     except Exception as e:
+        logging.error(f"OCR error: {str(e)}")
+        return "Not detected", 0.0