Abhisesh7 commited on
Commit
c104c8e
·
verified ·
1 Parent(s): f3645fd

Rename image_extraction.py to image_ocr.py

Browse files
Files changed (2) hide show
  1. image_extraction.py +0 -198
  2. image_ocr.py +21 -0
image_extraction.py DELETED
@@ -1,198 +0,0 @@
1
- from paddleocr import PaddleOCR
2
- from PIL import Image, ImageEnhance, ImageFilter
3
- import io
4
- import logging
5
- import time
6
- import os
7
- import numpy as np
8
-
9
- # Set up logging
10
- logging.basicConfig(level=logging.DEBUG)
11
- logger = logging.getLogger(__name__)
12
-
13
- # Initialize PaddleOCR with retries
14
- def initialize_paddle_ocr():
15
- """
16
- Initialize PaddleOCR with retry logic for downloading models.
17
-
18
- Returns:
19
- PaddleOCR instance or None if initialization fails.
20
- """
21
- max_retries = 3
22
- retry_delay = 5 # seconds
23
-
24
- for attempt in range(1, max_retries + 1):
25
- try:
26
- logger.info(f"Initializing PaddleOCR (Attempt {attempt}/{max_retries})...")
27
- ocr = PaddleOCR(
28
- use_angle_cls=True,
29
- lang='en',
30
- use_gpu=False,
31
- show_log=False, # Suppress PaddleOCR logs to reduce noise
32
- det_max_side_len=3500, # Increase max side length for better detection
33
- rec_batch_num=1, # Process one image at a time for stability
34
- det_db_score_mode='slow', # Use most accurate detection
35
- det_db_box_thresh=0.2, # Lower threshold for better text detection
36
- det_db_unclip_ratio=3.5, # Increase ratio for better text region detection
37
- drop_score=0.1, # Lower drop score to retain more text
38
- det_db_thresh=0.1 # Lower threshold for detection
39
- )
40
- logger.info("PaddleOCR initialized successfully.")
41
- return ocr
42
- except Exception as e:
43
- logger.warning(f"PaddleOCR initialization failed: {str(e)}")
44
- if attempt < max_retries:
45
- logger.info(f"Retrying in {retry_delay} seconds...")
46
- time.sleep(retry_delay)
47
- else:
48
- logger.error("Failed to initialize PaddleOCR after all retries.")
49
- return None
50
-
51
- # Initialize PaddleOCR at module level
52
- ocr = initialize_paddle_ocr()
53
-
54
- def preprocess_image(img, attempt=1):
55
- """
56
- Preprocess the image to maximize OCR accuracy with multiple attempts.
57
-
58
- Args:
59
- img (PIL.Image): Input image.
60
- attempt (int): Preprocessing attempt number (1 or 2 for different settings).
61
-
62
- Returns:
63
- PIL.Image: Preprocessed image.
64
- """
65
- try:
66
- # Resize image to a higher resolution for better OCR
67
- max_size = (3000, 3000)
68
- img.thumbnail(max_size, Image.Resampling.LANCZOS)
69
-
70
- # Convert to grayscale
71
- img = img.convert('L')
72
-
73
- # Increase contrast
74
- enhancer = ImageEnhance.Contrast(img)
75
- img = enhancer.enhance(5.0 if attempt == 1 else 3.0)
76
-
77
- # Sharpen the image
78
- img = img.filter(ImageFilter.SHARPEN)
79
-
80
- # Reduce noise with a stronger filter
81
- img = img.filter(ImageFilter.MedianFilter(size=5 if attempt == 1 else 3))
82
-
83
- # Apply adaptive thresholding
84
- img_array = np.array(img)
85
- thresh = 120 if attempt == 1 else 150 # Different thresholds for different attempts
86
- img_array = np.where(img_array > thresh, 255, 0).astype(np.uint8)
87
- img = Image.fromarray(img_array)
88
-
89
- # Apply dilation to connect broken characters
90
- img = img.filter(ImageFilter.MaxFilter(size=3 if attempt == 1 else 5))
91
-
92
- return img
93
- except Exception as e:
94
- logger.error(f"Failed to preprocess image (Attempt {attempt}): {str(e)}")
95
- return img
96
-
97
- def validate_image(image_file):
98
- """
99
- Validate the image file before processing.
100
-
101
- Args:
102
- image_file (str): Path to the image file.
103
-
104
- Returns:
105
- bool: True if valid, False otherwise.
106
- """
107
- try:
108
- img = Image.open(image_file)
109
- img.verify() # Verify the image is not corrupted
110
- img = Image.open(image_file) # Reopen after verify, as verify() closes the file
111
- # Check image format
112
- if img.format not in ['PNG', 'JPEG', 'JPG']:
113
- logger.warning(f"Unsupported image format: {img.format}. Supported formats: PNG, JPEG, JPG.")
114
- return False
115
- # Check image size (avoid very large images that might cause memory issues)
116
- max_size = (5000, 5000) # Max width, height
117
- if img.size[0] > max_size[0] or img.size[1] > max_size[1]:
118
- logger.warning(f"Image size {img.size} exceeds maximum allowed size {max_size}.")
119
- return False
120
- return True
121
- except Exception as e:
122
- logger.error(f"Image validation failed: {str(e)}")
123
- return False
124
-
125
- def extract_text_from_image(image_file):
126
- """
127
- Extract text from an image using PaddleOCR with multiple attempts for accuracy.
128
-
129
- Args:
130
- image_file (str): Path to the image file.
131
-
132
- Returns:
133
- str: Extracted text or error message.
134
- """
135
- if ocr is None:
136
- error_msg = "Error: PaddleOCR not initialized. Please check the logs for details."
137
- logger.error(error_msg)
138
- return error_msg
139
-
140
- # Validate the image before processing
141
- if not validate_image(image_file):
142
- error_msg = "Error: Invalid or unsupported image file."
143
- logger.error(error_msg)
144
- return error_msg
145
-
146
- try:
147
- logger.info(f"Extracting text from image: {image_file}")
148
- # Convert image file to a format PaddleOCR can process
149
- img = Image.open(image_file)
150
-
151
- # First attempt with default preprocessing
152
- logger.info("Attempt 1: Extracting text with default preprocessing...")
153
- img_processed = preprocess_image(img, attempt=1)
154
- img_byte_arr = io.BytesIO()
155
- img_processed.save(img_byte_arr, format='PNG')
156
- img_byte_arr = img_byte_arr.getvalue()
157
-
158
- # Perform OCR
159
- result = ocr.ocr(img_byte_arr, cls=True)
160
-
161
- # Extract text from OCR result
162
- text = ""
163
- if result:
164
- for line in result:
165
- if line: # Check if line is not None
166
- for word_info in line:
167
- text += word_info[1][0] + "\n"
168
-
169
- # If text is empty or contains obvious errors, try a second attempt
170
- if not text.strip() or len(text.splitlines()) < 5: # Arbitrary threshold for "too little text"
171
- logger.warning("First OCR attempt yielded insufficient text. Trying second attempt with different preprocessing...")
172
- img_processed = preprocess_image(img, attempt=2)
173
- img_byte_arr = io.BytesIO()
174
- img_processed.save(img_byte_arr, format='PNG')
175
- img_byte_arr = img_byte_arr.getvalue()
176
-
177
- # Perform OCR again
178
- result = ocr.ocr(img_byte_arr, cls=True)
179
-
180
- # Extract text from second attempt
181
- text = ""
182
- if result:
183
- for line in result:
184
- if line: # Check if line is not None
185
- for word_info in line:
186
- text += word_info[1][0] + "\n"
187
-
188
- logger.info("Successfully extracted text from image.")
189
- logger.debug(f"Extracted text:\n{text}")
190
- return text.strip()
191
- except MemoryError as e:
192
- error_msg = f"Error: Insufficient memory to process the image: {str(e)}"
193
- logger.error(error_msg)
194
- return error_msg
195
- except Exception as e:
196
- error_msg = f"Error extracting text from image: {str(e)}"
197
- logger.error(error_msg)
198
- return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
image_ocr.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import easyocr
2
+ import logging
3
+
4
+ # Set up logging to suppress unnecessary warnings
5
+ logging.getLogger("easyocr").setLevel(logging.ERROR)
6
+
7
+ def extract_text_from_image(image_path):
8
+ """Extract text from an image using EasyOCR."""
9
+ try:
10
+ # Initialize EasyOCR reader (English language, CPU mode)
11
+ reader = easyocr.Reader(['en'], gpu=False)
12
+
13
+ # Read text from the image
14
+ results = reader.readtext(image_path, detail=0, paragraph=True)
15
+
16
+ # Combine the extracted text into a single string
17
+ text = "\n".join(results)
18
+ print("Extracted text from image:\n", text) # Debug: Print extracted text
19
+ return text
20
+ except Exception as e:
21
+ return f"Error extracting text from image: {str(e)}"