sunbal7 commited on
Commit
26a8a16
Β·
verified Β·
1 Parent(s): 4da3e5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +369 -150
app.py CHANGED
@@ -1,11 +1,20 @@
1
  import streamlit as st
2
  import cv2
3
  import numpy as np
4
- import pytesseract
5
  from PIL import Image
6
  import tempfile
7
  import os
8
  import io
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Page configuration
11
  st.set_page_config(
@@ -14,23 +23,60 @@ st.set_page_config(
14
  layout="centered"
15
  )
16
 
17
- # Title and description
18
- st.title("πŸ“„ Intelligent Document Scanner & OCR")
19
- st.markdown("Upload a photo of a document to get a cleaned, flattened version with extracted text.")
20
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Initialize session state for processed images
23
  if 'processed_image' not in st.session_state:
24
  st.session_state.processed_image = None
25
  if 'extracted_text' not in st.session_state:
26
  st.session_state.extracted_text = ""
27
  if 'original_image' not in st.session_state:
28
  st.session_state.original_image = None
 
 
29
 
30
  def preprocess_image(image):
31
  """Preprocess image for better edge detection"""
32
  # Convert to grayscale
33
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
 
 
34
 
35
  # Apply CLAHE for better contrast
36
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
@@ -41,66 +87,115 @@ def preprocess_image(image):
41
 
42
  return blurred
43
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def find_document_contour(image):
45
  """Find the document contour in the image"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Edge detection
47
- edges = cv2.Canny(image, 50, 150)
48
 
49
- # Morphological operations to close gaps
50
  kernel = np.ones((5,5), np.uint8)
51
  edges = cv2.dilate(edges, kernel, iterations=1)
52
  edges = cv2.erode(edges, kernel, iterations=1)
53
 
54
  # Find contours
55
- contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
56
 
57
  # Sort contours by area and get the largest ones
58
- contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
 
 
 
59
 
60
- # Approximate the contour
61
  for contour in contours:
 
62
  perimeter = cv2.arcLength(contour, True)
63
  approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
64
 
65
- # If we found a quadrilateral
 
66
  if len(approx) == 4:
67
- return approx
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- return None
70
 
71
  def order_points(pts):
72
  """Reorder points to consistent order: top-left, top-right, bottom-right, bottom-left"""
73
  rect = np.zeros((4, 2), dtype="float32")
74
 
75
- # Sum and difference
 
76
  s = pts.sum(axis=1)
77
  rect[0] = pts[np.argmin(s)] # top-left
78
  rect[2] = pts[np.argmax(s)] # bottom-right
79
 
 
80
  diff = np.diff(pts, axis=1)
81
  rect[1] = pts[np.argmin(diff)] # top-right
82
  rect[3] = pts[np.argmax(diff)] # bottom-left
83
 
84
  return rect
85
 
86
- def perspective_transform(image, contour):
87
- """Apply perspective transformation to get bird's eye view"""
88
- # Get the four corners
89
- pts = contour.reshape(4, 2)
90
  rect = order_points(pts)
91
-
92
  (tl, tr, br, bl) = rect
93
 
94
- # Calculate width and height of new image
95
  width_a = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
96
  width_b = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
97
  max_width = max(int(width_a), int(width_b))
98
 
 
99
  height_a = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
100
  height_b = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
101
  max_height = max(int(height_a), int(height_b))
102
 
103
- # Destination points
104
  dst = np.array([
105
  [0, 0],
106
  [max_width - 1, 0],
@@ -108,168 +203,292 @@ def perspective_transform(image, contour):
108
  [0, max_height - 1]
109
  ], dtype="float32")
110
 
111
- # Calculate perspective transform matrix
112
- matrix = cv2.getPerspectiveTransform(rect, dst)
113
 
114
- # Apply perspective transform
115
- warped = cv2.warpPerspective(image, matrix, (max_width, max_height))
116
 
117
  return warped
118
 
119
- def process_document(image_array):
120
- """Main processing pipeline"""
121
- # Create a copy for processing
122
- image = image_array.copy()
123
-
124
- # Preprocess
125
- processed = preprocess_image(image)
126
-
127
- # Find document contour
128
- contour = find_document_contour(processed)
129
-
130
- if contour is None:
131
- st.warning("Could not detect document edges. Showing original image.")
132
- return image, ""
133
 
134
- # Draw contour on original image
135
- contour_image = image.copy()
136
- cv2.drawContours(contour_image, [contour], -1, (0, 255, 0), 3)
137
 
138
- # Apply perspective transform
139
- warped = perspective_transform(image, contour)
140
 
141
- # Convert to grayscale for OCR
142
- warped_gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
 
 
 
143
 
144
- # Apply adaptive thresholding for better OCR
145
- _, warped_binary = cv2.threshold(warped_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
 
 
 
146
 
147
- # Extract text using Tesseract
148
  try:
 
 
 
149
  # Configure Tesseract parameters
150
- custom_config = r'--oem 3 --psm 6 -l eng'
151
- text = pytesseract.image_to_string(warped_binary, config=custom_config)
152
- except:
153
- text = "OCR failed. Please check if Tesseract is properly installed."
154
-
155
- return warped_binary, text
 
 
156
 
157
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  # File uploader
159
  uploaded_file = st.file_uploader(
160
  "Choose an image file",
161
- type=['jpg', 'jpeg', 'png'],
162
- help="Upload a photo containing a document"
163
  )
164
 
165
  if uploaded_file is not None:
166
- # Read image
167
- image = Image.open(uploaded_file)
168
- st.session_state.original_image = image
169
-
170
- # Convert to OpenCV format
171
- image_array = np.array(image)
172
-
173
- # Convert RGB to BGR for OpenCV
174
- if len(image_array.shape) == 3:
175
- if image_array.shape[2] == 3: # RGB
176
- image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
177
- elif image_array.shape[2] == 4: # RGBA
178
- image_array = cv2.cvtColor(image_array, cv2.COLOR_RGBA2BGR)
179
-
180
- # Process button
181
- col1, col2, col3 = st.columns([1, 2, 1])
182
- with col2:
183
- process_btn = st.button("πŸ” Process Document", use_container_width=True)
184
-
185
- if process_btn:
186
- with st.spinner("Processing document..."):
187
- # Process the document
188
- processed_image, extracted_text = process_document(image_array)
189
-
190
- # Store in session state
191
- st.session_state.processed_image = processed_image
192
- st.session_state.extracted_text = extracted_text
193
-
194
- # Display results
195
- if st.session_state.processed_image is not None:
196
- st.markdown("---")
197
 
198
- # Create two columns for image display
199
- col1, col2 = st.columns(2)
200
 
201
- with col1:
202
- st.subheader("πŸ“Έ Original Image")
203
- st.image(st.session_state.original_image, use_column_width=True)
 
 
 
204
 
 
 
 
205
  with col2:
206
- st.subheader("πŸ“„ Processed Document")
207
- st.image(st.session_state.processed_image, use_column_width=True, clamp=True)
208
 
209
- # OCR Results
210
  st.markdown("---")
211
- st.subheader("πŸ“ Extracted Text")
212
-
213
- # Text area for extracted text
214
- text_area = st.text_area(
215
- "OCR Results",
216
- st.session_state.extracted_text,
217
- height=200,
218
- label_visibility="collapsed"
219
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- # Download buttons
222
- col1, col2 = st.columns(2)
223
-
224
- with col1:
225
- if st.session_state.processed_image is not None:
226
- # Convert processed image to bytes for download
227
- is_success, buffer = cv2.imencode(".png", st.session_state.processed_image)
228
- if is_success:
229
- st.download_button(
230
- label="πŸ“₯ Download Processed Image",
231
- data=buffer.tobytes(),
232
- file_name="processed_document.png",
233
- mime="image/png",
234
- use_container_width=True
235
- )
236
-
237
- with col2:
 
 
 
 
 
 
 
 
 
 
238
  if st.session_state.extracted_text:
239
- st.download_button(
240
- label="πŸ“₯ Download Text",
241
- data=st.session_state.extracted_text,
242
- file_name="extracted_text.txt",
243
- mime="text/plain",
244
- use_container_width=True
245
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  else:
248
- # Instructions when no file is uploaded
249
  st.markdown("---")
 
250
  col1, col2, col3 = st.columns([1, 2, 1])
251
  with col2:
252
  st.info("πŸ‘† Please upload an image file to begin")
253
 
254
- # Features list
255
- st.markdown("### Features:")
256
- st.markdown("""
257
- - **Auto-edge detection** using Canny edge detection
258
- - **Perspective correction** using homography
259
- - **Document deskewing** and auto-cropping
260
- - **OCR text extraction** using Tesseract
261
- - **Image enhancement** for better readability
262
- """)
263
 
264
- # Tips
265
- st.markdown("### Tips for best results:")
266
- st.markdown("""
267
- 1. Ensure good lighting when taking the photo
268
- 2. Try to capture the entire document
269
- 3. Keep the document as flat as possible
270
- 4. Avoid shadows on the document
271
- 5. Ensure text is clearly visible
272
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
  if __name__ == "__main__":
275
  main()
 
1
  import streamlit as st
2
  import cv2
3
  import numpy as np
 
4
  from PIL import Image
5
  import tempfile
6
  import os
7
  import io
8
+ import subprocess
9
+ import sys
10
+
11
+ # Check if Tesseract is installed, if not, try to install it
12
+ try:
13
+ import pytesseract
14
+ TESSERACT_AVAILABLE = True
15
+ except ImportError:
16
+ st.warning("pytesseract not found. OCR features will be limited.")
17
+ TESSERACT_AVAILABLE = False
18
 
19
  # Page configuration
20
  st.set_page_config(
 
23
  layout="centered"
24
  )
25
 
26
+ # Custom CSS for better UI
27
+ st.markdown("""
28
+ <style>
29
+ .stButton > button {
30
+ width: 100%;
31
+ background-color: #4CAF50;
32
+ color: white;
33
+ border: none;
34
+ padding: 10px 24px;
35
+ text-align: center;
36
+ text-decoration: none;
37
+ display: inline-block;
38
+ font-size: 16px;
39
+ margin: 4px 2px;
40
+ cursor: pointer;
41
+ border-radius: 4px;
42
+ }
43
+ .stButton > button:hover {
44
+ background-color: #45a049;
45
+ }
46
+ .success-box {
47
+ padding: 10px;
48
+ background-color: #d4edda;
49
+ border: 1px solid #c3e6cb;
50
+ border-radius: 4px;
51
+ color: #155724;
52
+ }
53
+ .warning-box {
54
+ padding: 10px;
55
+ background-color: #fff3cd;
56
+ border: 1px solid #ffeaa7;
57
+ border-radius: 4px;
58
+ color: #856404;
59
+ }
60
+ </style>
61
+ """, unsafe_allow_html=True)
62
 
63
+ # Initialize session state
64
  if 'processed_image' not in st.session_state:
65
  st.session_state.processed_image = None
66
  if 'extracted_text' not in st.session_state:
67
  st.session_state.extracted_text = ""
68
  if 'original_image' not in st.session_state:
69
  st.session_state.original_image = None
70
+ if 'contour_image' not in st.session_state:
71
+ st.session_state.contour_image = None
72
 
73
  def preprocess_image(image):
74
  """Preprocess image for better edge detection"""
75
  # Convert to grayscale
76
+ if len(image.shape) == 3:
77
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
78
+ else:
79
+ gray = image.copy()
80
 
81
  # Apply CLAHE for better contrast
82
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
 
87
 
88
  return blurred
89
 
90
+ def auto_canny(image, sigma=0.33):
91
+ """Apply automatic Canny edge detection using the median of the image"""
92
+ # Compute the median of the single channel pixel intensities
93
+ v = np.median(image)
94
+
95
+ # Apply automatic Canny edge detection using the computed median
96
+ lower = int(max(0, (1.0 - sigma) * v))
97
+ upper = int(min(255, (1.0 + sigma) * v))
98
+ edged = cv2.Canny(image, lower, upper)
99
+
100
+ return edged
101
+
102
  def find_document_contour(image):
103
  """Find the document contour in the image"""
104
+ # Resize image for faster processing (keep aspect ratio)
105
+ height, width = image.shape[:2]
106
+ max_dimension = 800
107
+ scale = 1
108
+
109
+ if height > max_dimension or width > max_dimension:
110
+ if height > width:
111
+ scale = max_dimension / height
112
+ else:
113
+ scale = max_dimension / width
114
+
115
+ new_width = int(width * scale)
116
+ new_height = int(height * scale)
117
+ resized = cv2.resize(image, (new_width, new_height))
118
+ else:
119
+ resized = image.copy()
120
+ new_height, new_width = height, width
121
+
122
  # Edge detection
123
+ edges = auto_canny(resized)
124
 
125
+ # Close gaps between edges (dilation followed by erosion)
126
  kernel = np.ones((5,5), np.uint8)
127
  edges = cv2.dilate(edges, kernel, iterations=1)
128
  edges = cv2.erode(edges, kernel, iterations=1)
129
 
130
  # Find contours
131
+ contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
132
 
133
  # Sort contours by area and get the largest ones
134
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
135
+
136
+ # Initialize screen contour
137
+ screen_contour = None
138
 
139
+ # Loop over contours
140
  for contour in contours:
141
+ # Approximate the contour
142
  perimeter = cv2.arcLength(contour, True)
143
  approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
144
 
145
+ # If our approximated contour has four points, we can assume
146
+ # that we have found our document
147
  if len(approx) == 4:
148
+ screen_contour = approx
149
+ break
150
+
151
+ # If we didn't find a 4-point contour, try to find the largest rectangle
152
+ if screen_contour is None and len(contours) > 0:
153
+ # Get the largest contour
154
+ largest_contour = contours[0]
155
+ perimeter = cv2.arcLength(largest_contour, True)
156
+ screen_contour = cv2.approxPolyDP(largest_contour, 0.02 * perimeter, True)
157
+
158
+ # Scale contour back to original size if we resized
159
+ if screen_contour is not None and scale != 1:
160
+ screen_contour = screen_contour / scale
161
+ screen_contour = screen_contour.astype(np.int32)
162
 
163
+ return screen_contour
164
 
165
  def order_points(pts):
166
  """Reorder points to consistent order: top-left, top-right, bottom-right, bottom-left"""
167
  rect = np.zeros((4, 2), dtype="float32")
168
 
169
+ # The top-left point will have the smallest sum,
170
+ # the bottom-right point will have the largest sum
171
  s = pts.sum(axis=1)
172
  rect[0] = pts[np.argmin(s)] # top-left
173
  rect[2] = pts[np.argmax(s)] # bottom-right
174
 
175
+ # Compute the difference between points
176
  diff = np.diff(pts, axis=1)
177
  rect[1] = pts[np.argmin(diff)] # top-right
178
  rect[3] = pts[np.argmax(diff)] # bottom-left
179
 
180
  return rect
181
 
182
+ def four_point_transform(image, pts):
183
+ """Apply perspective transform to get bird's eye view"""
184
+ # Obtain a consistent order of the points
 
185
  rect = order_points(pts)
 
186
  (tl, tr, br, bl) = rect
187
 
188
+ # Compute the width of the new image
189
  width_a = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
190
  width_b = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
191
  max_width = max(int(width_a), int(width_b))
192
 
193
+ # Compute the height of the new image
194
  height_a = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
195
  height_b = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
196
  max_height = max(int(height_a), int(height_b))
197
 
198
+ # Construct the destination points
199
  dst = np.array([
200
  [0, 0],
201
  [max_width - 1, 0],
 
203
  [0, max_height - 1]
204
  ], dtype="float32")
205
 
206
+ # Compute the perspective transform matrix
207
+ M = cv2.getPerspectiveTransform(rect, dst)
208
 
209
+ # Apply the perspective transform
210
+ warped = cv2.warpPerspective(image, M, (max_width, max_height))
211
 
212
  return warped
213
 
214
+ def enhance_image(image):
215
+ """Enhance image for better OCR results"""
216
+ # Convert to grayscale if needed
217
+ if len(image.shape) == 3:
218
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
219
+ else:
220
+ gray = image.copy()
 
 
 
 
 
 
 
221
 
222
+ # Apply adaptive thresholding
223
+ binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
224
+ cv2.THRESH_BINARY, 11, 2)
225
 
226
+ # Denoise
227
+ denoised = cv2.fastNlMeansDenoising(binary, h=10)
228
 
229
+ # Sharpen
230
+ kernel = np.array([[0, -1, 0],
231
+ [-1, 5, -1],
232
+ [0, -1, 0]])
233
+ sharpened = cv2.filter2D(denoised, -1, kernel)
234
 
235
+ return sharpened
236
+
237
+ def extract_text_from_image(image):
238
+ """Extract text from image using Tesseract OCR"""
239
+ if not TESSERACT_AVAILABLE:
240
+ return "OCR feature requires pytesseract. Please install tesseract-ocr on your system."
241
 
 
242
  try:
243
+ # Preprocess image for better OCR
244
+ enhanced = enhance_image(image)
245
+
246
  # Configure Tesseract parameters
247
+ custom_config = r'--oem 3 --psm 6'
248
+
249
+ # Extract text
250
+ text = pytesseract.image_to_string(enhanced, config=custom_config)
251
+
252
+ return text.strip()
253
+ except Exception as e:
254
+ return f"OCR Error: {str(e)}"
255
 
256
  def main():
257
+ # Title and description
258
+ st.title("πŸ“„ Intelligent Document Scanner & OCR")
259
+ st.markdown("Upload a photo of a document to get a cleaned, flattened version with extracted text.")
260
+ st.markdown("---")
261
+
262
+ # Sidebar for controls
263
+ with st.sidebar:
264
+ st.header("Settings")
265
+ show_contour = st.checkbox("Show detected edges", value=True)
266
+ enhance_ocr = st.checkbox("Enhance for OCR", value=True)
267
+
268
+ st.markdown("---")
269
+ st.markdown("### Tips:")
270
+ st.markdown("""
271
+ 1. Good lighting is essential
272
+ 2. Capture entire document
273
+ 3. Avoid shadows
274
+ 4. Keep camera parallel to document
275
+ """)
276
+
277
  # File uploader
278
  uploaded_file = st.file_uploader(
279
  "Choose an image file",
280
+ type=['jpg', 'jpeg', 'png', 'bmp', 'tiff'],
281
+ help="Supported formats: JPG, PNG, BMP, TIFF"
282
  )
283
 
284
  if uploaded_file is not None:
285
+ try:
286
+ # Read image
287
+ image = Image.open(uploaded_file)
288
+ st.session_state.original_image = image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
+ # Convert to OpenCV format
291
+ image_array = np.array(image)
292
 
293
+ # Convert RGB to BGR for OpenCV
294
+ if len(image_array.shape) == 3:
295
+ if image_array.shape[2] == 3: # RGB
296
+ image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
297
+ elif image_array.shape[2] == 4: # RGBA
298
+ image_array = cv2.cvtColor(image_array, cv2.COLOR_RGBA2BGR)
299
 
300
+ # Display original image
301
+ st.subheader("πŸ“Έ Original Image")
302
+ col1, col2, col3 = st.columns([1, 2, 1])
303
  with col2:
304
+ st.image(image, use_column_width=True, caption=f"Size: {image.size[0]}x{image.size[1]}")
 
305
 
306
+ # Process button
307
  st.markdown("---")
308
+ if st.button("πŸ” Process Document", key="process"):
309
+ with st.spinner("Processing..."):
310
+ # Step 1: Preprocess
311
+ processed = preprocess_image(image_array)
312
+
313
+ # Step 2: Find contour
314
+ contour = find_document_contour(processed)
315
+
316
+ # Create contour visualization if requested
317
+ if show_contour and contour is not None:
318
+ contour_img = image_array.copy()
319
+ cv2.drawContours(contour_img, [contour], -1, (0, 255, 0), 3)
320
+ st.session_state.contour_image = contour_img
321
+
322
+ if contour is not None:
323
+ # Step 3: Apply perspective transform
324
+ warped = four_point_transform(image_array, contour.reshape(4, 2))
325
+
326
+ # Step 4: Enhance if requested
327
+ if enhance_ocr:
328
+ final_image = enhance_image(warped)
329
+ else:
330
+ final_image = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
331
+
332
+ # Step 5: Extract text
333
+ extracted_text = extract_text_from_image(final_image)
334
+
335
+ # Store results
336
+ st.session_state.processed_image = final_image
337
+ st.session_state.extracted_text = extracted_text
338
+
339
+ st.success("βœ… Document processed successfully!")
340
+ else:
341
+ st.warning("⚠️ Could not detect document edges. Try adjusting the image or lighting.")
342
+
343
+ # If no contour found, try to enhance the whole image
344
+ if enhance_ocr:
345
+ final_image = enhance_image(image_array)
346
+ else:
347
+ final_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)
348
+
349
+ extracted_text = extract_text_from_image(final_image)
350
+
351
+ st.session_state.processed_image = final_image
352
+ st.session_state.extracted_text = extracted_text
353
 
354
+ # Display results if available
355
+ if st.session_state.processed_image is not None:
356
+ st.markdown("---")
357
+ st.subheader("πŸ“„ Processed Results")
358
+
359
+ # Create columns for images
360
+ col1, col2 = st.columns(2)
361
+
362
+ with col1:
363
+ if st.session_state.contour_image is not None and show_contour:
364
+ # Convert BGR to RGB for display
365
+ contour_rgb = cv2.cvtColor(st.session_state.contour_image, cv2.COLOR_BGR2RGB)
366
+ st.image(contour_rgb, use_column_width=True, caption="Detected Document Edges")
367
+ elif st.session_state.original_image is not None:
368
+ st.image(st.session_state.original_image, use_column_width=True, caption="Original Image")
369
+
370
+ with col2:
371
+ if st.session_state.processed_image is not None:
372
+ st.image(st.session_state.processed_image,
373
+ use_column_width=True,
374
+ clamp=True,
375
+ caption="Processed & Flattened Document")
376
+
377
+ # OCR Results
378
+ st.markdown("---")
379
+ st.subheader("πŸ“ Extracted Text")
380
+
381
  if st.session_state.extracted_text:
382
+ # Display text in expandable area
383
+ with st.expander("View Extracted Text", expanded=True):
384
+ st.text_area("OCR Output",
385
+ st.session_state.extracted_text,
386
+ height=200,
387
+ label_visibility="collapsed")
388
+
389
+ # Text statistics
390
+ word_count = len(st.session_state.extracted_text.split())
391
+ char_count = len(st.session_state.extracted_text)
392
+ st.caption(f"πŸ“Š Statistics: {word_count} words, {char_count} characters")
393
+
394
+ # Download buttons
395
+ st.markdown("---")
396
+ col1, col2 = st.columns(2)
397
+
398
+ with col1:
399
+ # Download processed image
400
+ if st.session_state.processed_image is not None:
401
+ # Convert to PIL Image for download
402
+ if len(st.session_state.processed_image.shape) == 2: # Grayscale
403
+ pil_img = Image.fromarray(st.session_state.processed_image)
404
+ else: # Color
405
+ rgb_img = cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB)
406
+ pil_img = Image.fromarray(rgb_img)
407
+
408
+ # Convert to bytes
409
+ img_byte_arr = io.BytesIO()
410
+ pil_img.save(img_byte_arr, format='PNG')
411
+ img_byte_arr = img_byte_arr.getvalue()
412
+
413
+ st.download_button(
414
+ label="πŸ’Ύ Download Processed Image",
415
+ data=img_byte_arr,
416
+ file_name="processed_document.png",
417
+ mime="image/png",
418
+ use_container_width=True
419
+ )
420
+
421
+ with col2:
422
+ # Download text
423
+ if st.session_state.extracted_text:
424
+ st.download_button(
425
+ label="πŸ’Ύ Download Text",
426
+ data=st.session_state.extracted_text,
427
+ file_name="extracted_text.txt",
428
+ mime="text/plain",
429
+ use_container_width=True
430
+ )
431
+ else:
432
+ st.info("No text was extracted from the document.")
433
+
434
+ except Exception as e:
435
+ st.error(f"Error processing image: {str(e)}")
436
+ st.info("Please try again with a different image.")
437
 
438
  else:
439
+ # Show instructions when no file is uploaded
440
  st.markdown("---")
441
+
442
  col1, col2, col3 = st.columns([1, 2, 1])
443
  with col2:
444
  st.info("πŸ‘† Please upload an image file to begin")
445
 
446
+ # Sample workflow
447
+ st.markdown("### How it works:")
 
 
 
 
 
 
 
448
 
449
+ col1, col2, col3, col4 = st.columns(4)
450
+
451
+ with col1:
452
+ st.markdown("""
453
+ **1. Upload**
454
+ Choose a photo of your document
455
+ """)
456
+
457
+ with col2:
458
+ st.markdown("""
459
+ **2. Detect**
460
+ Automatically finds document edges
461
+ """)
462
+
463
+ with col3:
464
+ st.markdown("""
465
+ **3. Transform**
466
+ Corrects perspective and deskews
467
+ """)
468
+
469
+ with col4:
470
+ st.markdown("""
471
+ **4. Extract**
472
+ Performs OCR to get text
473
+ """)
474
+
475
+ # Technical details
476
+ with st.expander("Technical Details"):
477
+ st.markdown("""
478
+ **Algorithms Used:**
479
+
480
+ - **Edge Detection**: Canny edge detector with adaptive thresholds
481
+ - **Contour Detection**: Finds largest quadrilateral in image
482
+ - **Perspective Correction**: Homography transformation using four-point perspective
483
+ - **Image Enhancement**: Adaptive thresholding, denoising, and sharpening
484
+ - **OCR**: Tesseract OCR engine with optimized preprocessing
485
+
486
+ **Tech Stack:**
487
+ - OpenCV for computer vision
488
+ - Tesseract for OCR
489
+ - Streamlit for web interface
490
+ - NumPy for numerical operations
491
+ """)
492
 
493
  if __name__ == "__main__":
494
  main()