Spaces:

Abhisesh7
/

Invoice-Fraud-Detection

Sleeping

App Files Files Community

Abhisesh7 commited on May 22, 2025

Commit

c104c8e

verified ·

1 Parent(s): f3645fd

Rename image_extraction.py to image_ocr.py

Browse files

Files changed (2) hide show

image_extraction.py +0 -198
image_ocr.py +21 -0

image_extraction.py DELETED Viewed

@@ -1,198 +0,0 @@
-from paddleocr import PaddleOCR
-from PIL import Image, ImageEnhance, ImageFilter
-import io
-import logging
-import time
-import os
-import numpy as np
-# Set up logging
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger(__name__)
-# Initialize PaddleOCR with retries
-def initialize_paddle_ocr():
-    """
-    Initialize PaddleOCR with retry logic for downloading models.
-    Returns:
-        PaddleOCR instance or None if initialization fails.
-    """
-    max_retries = 3
-    retry_delay = 5  # seconds
-    for attempt in range(1, max_retries + 1):
-        try:
-            logger.info(f"Initializing PaddleOCR (Attempt {attempt}/{max_retries})...")
-            ocr = PaddleOCR(
-                use_angle_cls=True,
-                lang='en',
-                use_gpu=False,
-                show_log=False,  # Suppress PaddleOCR logs to reduce noise
-                det_max_side_len=3500,  # Increase max side length for better detection
-                rec_batch_num=1,  # Process one image at a time for stability
-                det_db_score_mode='slow',  # Use most accurate detection
-                det_db_box_thresh=0.2,  # Lower threshold for better text detection
-                det_db_unclip_ratio=3.5,  # Increase ratio for better text region detection
-                drop_score=0.1,  # Lower drop score to retain more text
-                det_db_thresh=0.1  # Lower threshold for detection
-            )
-            logger.info("PaddleOCR initialized successfully.")
-            return ocr
-        except Exception as e:
-            logger.warning(f"PaddleOCR initialization failed: {str(e)}")
-            if attempt < max_retries:
-                logger.info(f"Retrying in {retry_delay} seconds...")
-                time.sleep(retry_delay)
-            else:
-                logger.error("Failed to initialize PaddleOCR after all retries.")
-                return None
-# Initialize PaddleOCR at module level
-ocr = initialize_paddle_ocr()
-def preprocess_image(img, attempt=1):
-    """
-    Preprocess the image to maximize OCR accuracy with multiple attempts.
-    Args:
-        img (PIL.Image): Input image.
-        attempt (int): Preprocessing attempt number (1 or 2 for different settings).
-    Returns:
-        PIL.Image: Preprocessed image.
-    """
-    try:
-        # Resize image to a higher resolution for better OCR
-        max_size = (3000, 3000)
-        img.thumbnail(max_size, Image.Resampling.LANCZOS)
-        # Convert to grayscale
-        img = img.convert('L')
-        # Increase contrast
-        enhancer = ImageEnhance.Contrast(img)
-        img = enhancer.enhance(5.0 if attempt == 1 else 3.0)
-        # Sharpen the image
-        img = img.filter(ImageFilter.SHARPEN)
-        # Reduce noise with a stronger filter
-        img = img.filter(ImageFilter.MedianFilter(size=5 if attempt == 1 else 3))
-        # Apply adaptive thresholding
-        img_array = np.array(img)
-        thresh = 120 if attempt == 1 else 150  # Different thresholds for different attempts
-        img_array = np.where(img_array > thresh, 255, 0).astype(np.uint8)
-        img = Image.fromarray(img_array)
-        # Apply dilation to connect broken characters
-        img = img.filter(ImageFilter.MaxFilter(size=3 if attempt == 1 else 5))
-        return img
-    except Exception as e:
-        logger.error(f"Failed to preprocess image (Attempt {attempt}): {str(e)}")
-        return img
-def validate_image(image_file):
-    """
-    Validate the image file before processing.
-    Args:
-        image_file (str): Path to the image file.
-    Returns:
-        bool: True if valid, False otherwise.
-    """
-    try:
-        img = Image.open(image_file)
-        img.verify()  # Verify the image is not corrupted
-        img = Image.open(image_file)  # Reopen after verify, as verify() closes the file
-        # Check image format
-        if img.format not in ['PNG', 'JPEG', 'JPG']:
-            logger.warning(f"Unsupported image format: {img.format}. Supported formats: PNG, JPEG, JPG.")
-            return False
-        # Check image size (avoid very large images that might cause memory issues)
-        max_size = (5000, 5000)  # Max width, height
-        if img.size[0] > max_size[0] or img.size[1] > max_size[1]:
-            logger.warning(f"Image size {img.size} exceeds maximum allowed size {max_size}.")
-            return False
-        return True
-    except Exception as e:
-        logger.error(f"Image validation failed: {str(e)}")
-        return False
-def extract_text_from_image(image_file):
-    """
-    Extract text from an image using PaddleOCR with multiple attempts for accuracy.
-    Args:
-        image_file (str): Path to the image file.
-    Returns:
-        str: Extracted text or error message.
-    """
-    if ocr is None:
-        error_msg = "Error: PaddleOCR not initialized. Please check the logs for details."
-        logger.error(error_msg)
-        return error_msg
-    # Validate the image before processing
-    if not validate_image(image_file):
-        error_msg = "Error: Invalid or unsupported image file."
-        logger.error(error_msg)
-        return error_msg
-    try:
-        logger.info(f"Extracting text from image: {image_file}")
-        # Convert image file to a format PaddleOCR can process
-        img = Image.open(image_file)
-        # First attempt with default preprocessing
-        logger.info("Attempt 1: Extracting text with default preprocessing...")
-        img_processed = preprocess_image(img, attempt=1)
-        img_byte_arr = io.BytesIO()
-        img_processed.save(img_byte_arr, format='PNG')
-        img_byte_arr = img_byte_arr.getvalue()
-        # Perform OCR
-        result = ocr.ocr(img_byte_arr, cls=True)
-        # Extract text from OCR result
-        text = ""
-        if result:
-            for line in result:
-                if line:  # Check if line is not None
-                    for word_info in line:
-                        text += word_info[1][0] + "\n"
-        # If text is empty or contains obvious errors, try a second attempt
-        if not text.strip() or len(text.splitlines()) < 5:  # Arbitrary threshold for "too little text"
-            logger.warning("First OCR attempt yielded insufficient text. Trying second attempt with different preprocessing...")
-            img_processed = preprocess_image(img, attempt=2)
-            img_byte_arr = io.BytesIO()
-            img_processed.save(img_byte_arr, format='PNG')
-            img_byte_arr = img_byte_arr.getvalue()
-            # Perform OCR again
-            result = ocr.ocr(img_byte_arr, cls=True)
-            # Extract text from second attempt
-            text = ""
-            if result:
-                for line in result:
-                    if line:  # Check if line is not None
-                        for word_info in line:
-                            text += word_info[1][0] + "\n"
-        logger.info("Successfully extracted text from image.")
-        logger.debug(f"Extracted text:\n{text}")
-        return text.strip()
-    except MemoryError as e:
-        error_msg = f"Error: Insufficient memory to process the image: {str(e)}"
-        logger.error(error_msg)
-        return error_msg
-    except Exception as e:
-        error_msg = f"Error extracting text from image: {str(e)}"
-        logger.error(error_msg)
-        return error_msg

image_ocr.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import easyocr
+import logging
+# Set up logging to suppress unnecessary warnings
+logging.getLogger("easyocr").setLevel(logging.ERROR)
+def extract_text_from_image(image_path):
+    """Extract text from an image using EasyOCR."""
+    try:
+        # Initialize EasyOCR reader (English language, CPU mode)
+        reader = easyocr.Reader(['en'], gpu=False)
+        # Read text from the image
+        results = reader.readtext(image_path, detail=0, paragraph=True)
+        # Combine the extracted text into a single string
+        text = "\n".join(results)
+        print("Extracted text from image:\n", text)  # Debug: Print extracted text
+        return text
+    except Exception as e:
+        return f"Error extracting text from image: {str(e)}"