Spaces:

dev2607
/

LABEL_LENS

Sleeping

App Files Files Community

dev2607 commited on Mar 14, 2025

Commit

2d99a85

verified ·

1 Parent(s): 447cd51

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -22

app.py CHANGED Viewed

@@ -146,41 +146,102 @@ def extract_text_from_image(image):
         except (subprocess.SubprocessError, FileNotFoundError):
             return "Tesseract OCR is not installed or not properly configured. Please check installation."
-        # Image preprocessing for better OCR
         import cv2
         import numpy as np
-        # Convert PIL image to OpenCV format
         img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
         # Convert to grayscale
         gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
-        # Apply thresholding to get black and white image
-        _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-        # Noise removal
-        kernel = np.ones((1, 1), np.uint8)
-        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
-        # Dilate to connect text
-        binary = cv2.dilate(binary, kernel, iterations=1)
-        # Convert back to PIL image for tesseract
-        binary_pil = Image.fromarray(cv2.bitwise_not(binary))
-        # Run OCR with improved configuration
-        custom_config = r'--oem 3 --psm 6 -l eng'
-        text = pytesseract.image_to_string(binary_pil, config=custom_config)
-        if not text.strip():
-            # Try original image as fallback
-            text = pytesseract.image_to_string(image, config=custom_config)
-        if not text.strip():
             return "No text could be extracted. Ensure image is clear and readable."
-        return text
     except Exception as e:
         return f"Error extracting text: {str(e)}"

         except (subprocess.SubprocessError, FileNotFoundError):
             return "Tesseract OCR is not installed or not properly configured. Please check installation."
+        # Import necessary libraries
         import cv2
         import numpy as np
+        from PIL import Image, ImageOps, ImageEnhance
+        # First approach: Invert the image for light text on dark background
+        inverted_image = ImageOps.invert(image)
+        # Try OCR on inverted image
+        custom_config = r'--oem 3 --psm 6 -l eng --dpi 300'
+        inverted_text = pytesseract.image_to_string(inverted_image, config=custom_config)
+        # Second approach: OpenCV processing for colored backgrounds
         img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
         # Convert to grayscale
         gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
+        # Apply bilateral filter to preserve edges while reducing noise
+        filtered = cv2.bilateralFilter(gray, 11, 17, 17)
+        # Adaptive thresholding to handle varied lighting
+        thresh = cv2.adaptiveThreshold(filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                      cv2.THRESH_BINARY, 11, 2)
+        # Invert the image (if text is light on dark background)
+        inverted_thresh = cv2.bitwise_not(thresh)
+        # Try OCR on processed image
+        cv_text = pytesseract.image_to_string(
+            Image.fromarray(inverted_thresh),
+            config=custom_config
+        )
+        # Third approach: Color filtering to isolate text from colored background
+        # Convert to HSV color space to better isolate colors
+        hsv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2HSV)
+        # Create a mask to extract light colored text (assuming white/light text)
+        lower_white = np.array([0, 0, 150])
+        upper_white = np.array([180, 30, 255])
+        mask = cv2.inRange(hsv, lower_white, upper_white)
+        # Apply morphological operations to clean up the mask
+        kernel = np.ones((2, 2), np.uint8)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
+        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
+        # Improve character connectivity
+        mask = cv2.dilate(mask, kernel, iterations=1)
+        # Try OCR on color filtered image
+        color_text = pytesseract.image_to_string(
+            Image.fromarray(mask),
+            config=r'--oem 3 --psm 6 -l eng --dpi 300'
+        )
+        # Fourth approach: Try directly with the image but with different configs
+        direct_text = pytesseract.image_to_string(
+            image,
+            config=r'--oem 3 --psm 11 -l eng --dpi 300'
+        )
+        # Compare results and select the best one
+        results = [inverted_text, cv_text, color_text, direct_text]
+        # Select the result with the most alphanumeric characters
+        def count_alphanumeric(text):
+            return sum(c.isalnum() for c in text)
+        best_text = max(results, key=count_alphanumeric)
+        # If still poor results, try with explicit text color inversion in tesseract
+        if count_alphanumeric(best_text) < 20:
+            # Try with tesseract's built-in inversion
+            neg_text = pytesseract.image_to_string(
+                image,
+                config=r'--oem 3 --psm 6 -c textord_heavy_nr=1 -c textord_debug_printable=0 -l eng --dpi 300'
+            )
+            if count_alphanumeric(neg_text) > count_alphanumeric(best_text):
+                best_text = neg_text
+        # Clean up the text
+        best_text = re.sub(r'[^\w\s,;:%.()\n\'-]', '', best_text)
+        best_text = best_text.replace('\n\n', '\n')
+        # Special case for ingredients list format
+        if "ingredient" in best_text.lower() or any(x in best_text.lower() for x in ["sugar", "cocoa", "milk", "contain"]):
+            # Specific cleaning for ingredient lists
+            best_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', best_text)  # Add space between lowercase and uppercase
+            best_text = re.sub(r'(\d+)([a-zA-Z])', r'\1 \2', best_text)  # Add space between number and letter
+        if not best_text.strip():
             return "No text could be extracted. Ensure image is clear and readable."
+        return best_text.strip()
     except Exception as e:
         return f"Error extracting text: {str(e)}"