Spaces:
Sleeping
Sleeping
| import pytesseract | |
| import numpy as np | |
| import cv2 | |
| import re | |
| from PIL import Image | |
| import logging | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| def preprocess_image(img): | |
| """Preprocess image for robust OCR.""" | |
| try: | |
| # Convert to OpenCV format | |
| img = np.array(img) | |
| img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| # Convert to grayscale | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Estimate brightness for adaptive processing | |
| brightness = np.mean(gray) | |
| # Apply CLAHE for contrast enhancement | |
| clahe_clip = 4.0 if brightness < 100 else 2.0 | |
| clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8)) | |
| enhanced = clahe.apply(gray) | |
| # Apply adaptive thresholding | |
| block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1)) | |
| thresh = cv2.adaptiveThreshold( | |
| enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2 | |
| ) | |
| # Noise reduction | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) | |
| thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1) | |
| return thresh | |
| except Exception as e: | |
| logging.error(f"Preprocessing failed: {str(e)}") | |
| return img | |
| def extract_weight_from_image(pil_img): | |
| """Extract weight from any digital scale image.""" | |
| try: | |
| # Convert PIL image to OpenCV | |
| img = np.array(pil_img) | |
| img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| # Preprocess image | |
| thresh = preprocess_image(img) | |
| # Try multiple Tesseract configurations | |
| configs = [ | |
| r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.', # Single line | |
| r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' # Block of text | |
| ] | |
| for config in configs: | |
| text = pytesseract.image_to_string(thresh, config=config) | |
| logging.info(f"Tesseract raw output (config {config}): {text}") | |
| # Clean and validate text | |
| text = re.sub(r"[^\d\.]", "", text) | |
| if text.count('.') > 1: | |
| text = text.replace('.', '', text.count('.') - 1) | |
| text = text.strip('.') | |
| if text and re.fullmatch(r"^\d*\.?\d*$", text): | |
| text = text.lstrip('0') or '0' | |
| confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0 | |
| try: | |
| weight = float(text) | |
| if 0.001 <= weight <= 5000: | |
| logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%") | |
| return text, confidence | |
| except ValueError: | |
| logging.warning(f"Invalid weight format: {text}") | |
| logging.info("No valid weight detected.") | |
| return "Not detected", 0.0 | |
| except Exception as e: | |
| logging.error(f"Weight extraction failed: {str(e)}") | |
| return "Not detected", 0.0 |