Spaces:

sunbal7
/

LexGuardian

Sleeping

App Files Files Community

sunbal7 commited on Dec 1, 2025

Commit

26a8a16

verified ·

1 Parent(s): 4da3e5c

Update app.py

Browse files

Files changed (1) hide show

app.py +369 -150

app.py CHANGED Viewed

@@ -1,11 +1,20 @@
 import streamlit as st
 import cv2
 import numpy as np
-import pytesseract
 from PIL import Image
 import tempfile
 import os
 import io
 # Page configuration
 st.set_page_config(
@@ -14,23 +23,60 @@ st.set_page_config(
     layout="centered"
 )
-# Title and description
-st.title("📄 Intelligent Document Scanner & OCR")
-st.markdown("Upload a photo of a document to get a cleaned, flattened version with extracted text.")
-st.markdown("---")
-# Initialize session state for processed images
 if 'processed_image' not in st.session_state:
     st.session_state.processed_image = None
 if 'extracted_text' not in st.session_state:
     st.session_state.extracted_text = ""
 if 'original_image' not in st.session_state:
     st.session_state.original_image = None
 def preprocess_image(image):
     """Preprocess image for better edge detection"""
     # Convert to grayscale
-    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     # Apply CLAHE for better contrast
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
@@ -41,66 +87,115 @@ def preprocess_image(image):
     return blurred
 def find_document_contour(image):
     """Find the document contour in the image"""
     # Edge detection
-    edges = cv2.Canny(image, 50, 150)
-    # Morphological operations to close gaps
     kernel = np.ones((5,5), np.uint8)
     edges = cv2.dilate(edges, kernel, iterations=1)
     edges = cv2.erode(edges, kernel, iterations=1)
     # Find contours
-    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     # Sort contours by area and get the largest ones
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
-    # Approximate the contour
     for contour in contours:
         perimeter = cv2.arcLength(contour, True)
         approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
-        # If we found a quadrilateral
         if len(approx) == 4:
-            return approx
-    return None
 def order_points(pts):
     """Reorder points to consistent order: top-left, top-right, bottom-right, bottom-left"""
     rect = np.zeros((4, 2), dtype="float32")
-    # Sum and difference
     s = pts.sum(axis=1)
     rect[0] = pts[np.argmin(s)]  # top-left
     rect[2] = pts[np.argmax(s)]  # bottom-right
     diff = np.diff(pts, axis=1)
     rect[1] = pts[np.argmin(diff)]  # top-right
     rect[3] = pts[np.argmax(diff)]  # bottom-left
     return rect
-def perspective_transform(image, contour):
-    """Apply perspective transformation to get bird's eye view"""
-    # Get the four corners
-    pts = contour.reshape(4, 2)
     rect = order_points(pts)
     (tl, tr, br, bl) = rect
-    # Calculate width and height of new image
     width_a = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
     width_b = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
     max_width = max(int(width_a), int(width_b))
     height_a = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
     height_b = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
     max_height = max(int(height_a), int(height_b))
-    # Destination points
     dst = np.array([
         [0, 0],
         [max_width - 1, 0],
@@ -108,168 +203,292 @@ def perspective_transform(image, contour):
         [0, max_height - 1]
     ], dtype="float32")
-    # Calculate perspective transform matrix
-    matrix = cv2.getPerspectiveTransform(rect, dst)
-    # Apply perspective transform
-    warped = cv2.warpPerspective(image, matrix, (max_width, max_height))
     return warped
-def process_document(image_array):
-    """Main processing pipeline"""
-    # Create a copy for processing
-    image = image_array.copy()
-    # Preprocess
-    processed = preprocess_image(image)
-    # Find document contour
-    contour = find_document_contour(processed)
-    if contour is None:
-        st.warning("Could not detect document edges. Showing original image.")
-        return image, ""
-    # Draw contour on original image
-    contour_image = image.copy()
-    cv2.drawContours(contour_image, [contour], -1, (0, 255, 0), 3)
-    # Apply perspective transform
-    warped = perspective_transform(image, contour)
-    # Convert to grayscale for OCR
-    warped_gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
-    # Apply adaptive thresholding for better OCR
-    _, warped_binary = cv2.threshold(warped_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    # Extract text using Tesseract
     try:
         # Configure Tesseract parameters
-        custom_config = r'--oem 3 --psm 6 -l eng'
-        text = pytesseract.image_to_string(warped_binary, config=custom_config)
-    except:
-        text = "OCR failed. Please check if Tesseract is properly installed."
-    return warped_binary, text
 def main():
     # File uploader
     uploaded_file = st.file_uploader(
         "Choose an image file",
-        type=['jpg', 'jpeg', 'png'],
-        help="Upload a photo containing a document"
     )
     if uploaded_file is not None:
-        # Read image
-        image = Image.open(uploaded_file)
-        st.session_state.original_image = image
-        # Convert to OpenCV format
-        image_array = np.array(image)
-        # Convert RGB to BGR for OpenCV
-        if len(image_array.shape) == 3:
-            if image_array.shape[2] == 3:  # RGB
-                image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
-            elif image_array.shape[2] == 4:  # RGBA
-                image_array = cv2.cvtColor(image_array, cv2.COLOR_RGBA2BGR)
-        # Process button
-        col1, col2, col3 = st.columns([1, 2, 1])
-        with col2:
-            process_btn = st.button("🔍 Process Document", use_container_width=True)
-        if process_btn:
-            with st.spinner("Processing document..."):
-                # Process the document
-                processed_image, extracted_text = process_document(image_array)
-                # Store in session state
-                st.session_state.processed_image = processed_image
-                st.session_state.extracted_text = extracted_text
-        # Display results
-        if st.session_state.processed_image is not None:
-            st.markdown("---")
-            # Create two columns for image display
-            col1, col2 = st.columns(2)
-            with col1:
-                st.subheader("📸 Original Image")
-                st.image(st.session_state.original_image, use_column_width=True)
             with col2:
-                st.subheader("📄 Processed Document")
-                st.image(st.session_state.processed_image, use_column_width=True, clamp=True)
-            # OCR Results
             st.markdown("---")
-            st.subheader("📝 Extracted Text")
-            # Text area for extracted text
-            text_area = st.text_area(
-                "OCR Results",
-                st.session_state.extracted_text,
-                height=200,
-                label_visibility="collapsed"
-            )
-            # Download buttons
-            col1, col2 = st.columns(2)
-            with col1:
-                if st.session_state.processed_image is not None:
-                    # Convert processed image to bytes for download
-                    is_success, buffer = cv2.imencode(".png", st.session_state.processed_image)
-                    if is_success:
-                        st.download_button(
-                            label="📥 Download Processed Image",
-                            data=buffer.tobytes(),
-                            file_name="processed_document.png",
-                            mime="image/png",
-                            use_container_width=True
-                        )
-            with col2:
                 if st.session_state.extracted_text:
-                    st.download_button(
-                        label="📥 Download Text",
-                        data=st.session_state.extracted_text,
-                        file_name="extracted_text.txt",
-                        mime="text/plain",
-                        use_container_width=True
-                    )
     else:
-        # Instructions when no file is uploaded
         st.markdown("---")
         col1, col2, col3 = st.columns([1, 2, 1])
         with col2:
             st.info("👆 Please upload an image file to begin")
-        # Features list
-        st.markdown("### Features:")
-        st.markdown("""
-        - **Auto-edge detection** using Canny edge detection
-        - **Perspective correction** using homography
-        - **Document deskewing** and auto-cropping
-        - **OCR text extraction** using Tesseract
-        - **Image enhancement** for better readability
-        """)
-        # Tips
-        st.markdown("### Tips for best results:")
-        st.markdown("""
-        1. Ensure good lighting when taking the photo
-        2. Try to capture the entire document
-        3. Keep the document as flat as possible
-        4. Avoid shadows on the document
-        5. Ensure text is clearly visible
-        """)
 if __name__ == "__main__":
     main()

 import streamlit as st
 import cv2
 import numpy as np
 from PIL import Image
 import tempfile
 import os
 import io
+import subprocess
+import sys
+# Check if Tesseract is installed, if not, try to install it
+try:
+    import pytesseract
+    TESSERACT_AVAILABLE = True
+except ImportError:
+    st.warning("pytesseract not found. OCR features will be limited.")
+    TESSERACT_AVAILABLE = False
 # Page configuration
 st.set_page_config(
     layout="centered"
 )
+# Custom CSS for better UI
+st.markdown("""
+<style>
+    .stButton > button {
+        width: 100%;
+        background-color: #4CAF50;
+        color: white;
+        border: none;
+        padding: 10px 24px;
+        text-align: center;
+        text-decoration: none;
+        display: inline-block;
+        font-size: 16px;
+        margin: 4px 2px;
+        cursor: pointer;
+        border-radius: 4px;
+    }
+    .stButton > button:hover {
+        background-color: #45a049;
+    }
+    .success-box {
+        padding: 10px;
+        background-color: #d4edda;
+        border: 1px solid #c3e6cb;
+        border-radius: 4px;
+        color: #155724;
+    }
+    .warning-box {
+        padding: 10px;
+        background-color: #fff3cd;
+        border: 1px solid #ffeaa7;
+        border-radius: 4px;
+        color: #856404;
+    }
+</style>
+""", unsafe_allow_html=True)
+# Initialize session state
 if 'processed_image' not in st.session_state:
     st.session_state.processed_image = None
 if 'extracted_text' not in st.session_state:
     st.session_state.extracted_text = ""
 if 'original_image' not in st.session_state:
     st.session_state.original_image = None
+if 'contour_image' not in st.session_state:
+    st.session_state.contour_image = None
 def preprocess_image(image):
     """Preprocess image for better edge detection"""
     # Convert to grayscale
+    if len(image.shape) == 3:
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = image.copy()
     # Apply CLAHE for better contrast
     clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
     return blurred
+def auto_canny(image, sigma=0.33):
+    """Apply automatic Canny edge detection using the median of the image"""
+    # Compute the median of the single channel pixel intensities
+    v = np.median(image)
+    # Apply automatic Canny edge detection using the computed median
+    lower = int(max(0, (1.0 - sigma) * v))
+    upper = int(min(255, (1.0 + sigma) * v))
+    edged = cv2.Canny(image, lower, upper)
+    return edged
 def find_document_contour(image):
     """Find the document contour in the image"""
+    # Resize image for faster processing (keep aspect ratio)
+    height, width = image.shape[:2]
+    max_dimension = 800
+    scale = 1
+    if height > max_dimension or width > max_dimension:
+        if height > width:
+            scale = max_dimension / height
+        else:
+            scale = max_dimension / width
+        new_width = int(width * scale)
+        new_height = int(height * scale)
+        resized = cv2.resize(image, (new_width, new_height))
+    else:
+        resized = image.copy()
+        new_height, new_width = height, width
     # Edge detection
+    edges = auto_canny(resized)
+    # Close gaps between edges (dilation followed by erosion)
     kernel = np.ones((5,5), np.uint8)
     edges = cv2.dilate(edges, kernel, iterations=1)
     edges = cv2.erode(edges, kernel, iterations=1)
     # Find contours
+    contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
     # Sort contours by area and get the largest ones
+    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
+    # Initialize screen contour
+    screen_contour = None
+    # Loop over contours
     for contour in contours:
+        # Approximate the contour
         perimeter = cv2.arcLength(contour, True)
         approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
+        # If our approximated contour has four points, we can assume
+        # that we have found our document
         if len(approx) == 4:
+            screen_contour = approx
+            break
+    # If we didn't find a 4-point contour, try to find the largest rectangle
+    if screen_contour is None and len(contours) > 0:
+        # Get the largest contour
+        largest_contour = contours[0]
+        perimeter = cv2.arcLength(largest_contour, True)
+        screen_contour = cv2.approxPolyDP(largest_contour, 0.02 * perimeter, True)
+    # Scale contour back to original size if we resized
+    if screen_contour is not None and scale != 1:
+        screen_contour = screen_contour / scale
+        screen_contour = screen_contour.astype(np.int32)
+    return screen_contour
 def order_points(pts):
     """Reorder points to consistent order: top-left, top-right, bottom-right, bottom-left"""
     rect = np.zeros((4, 2), dtype="float32")
+    # The top-left point will have the smallest sum,
+    # the bottom-right point will have the largest sum
     s = pts.sum(axis=1)
     rect[0] = pts[np.argmin(s)]  # top-left
     rect[2] = pts[np.argmax(s)]  # bottom-right
+    # Compute the difference between points
     diff = np.diff(pts, axis=1)
     rect[1] = pts[np.argmin(diff)]  # top-right
     rect[3] = pts[np.argmax(diff)]  # bottom-left
     return rect
+def four_point_transform(image, pts):
+    """Apply perspective transform to get bird's eye view"""
+    # Obtain a consistent order of the points
     rect = order_points(pts)
     (tl, tr, br, bl) = rect
+    # Compute the width of the new image
     width_a = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
     width_b = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
     max_width = max(int(width_a), int(width_b))
+    # Compute the height of the new image
     height_a = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
     height_b = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
     max_height = max(int(height_a), int(height_b))
+    # Construct the destination points
     dst = np.array([
         [0, 0],
         [max_width - 1, 0],
         [0, max_height - 1]
     ], dtype="float32")
+    # Compute the perspective transform matrix
+    M = cv2.getPerspectiveTransform(rect, dst)
+    # Apply the perspective transform
+    warped = cv2.warpPerspective(image, M, (max_width, max_height))
     return warped
+def enhance_image(image):
+    """Enhance image for better OCR results"""
+    # Convert to grayscale if needed
+    if len(image.shape) == 3:
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = image.copy()
+    # Apply adaptive thresholding
+    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                   cv2.THRESH_BINARY, 11, 2)
+    # Denoise
+    denoised = cv2.fastNlMeansDenoising(binary, h=10)
+    # Sharpen
+    kernel = np.array([[0, -1, 0],
+                       [-1, 5, -1],
+                       [0, -1, 0]])
+    sharpened = cv2.filter2D(denoised, -1, kernel)
+    return sharpened
+def extract_text_from_image(image):
+    """Extract text from image using Tesseract OCR"""
+    if not TESSERACT_AVAILABLE:
+        return "OCR feature requires pytesseract. Please install tesseract-ocr on your system."
     try:
+        # Preprocess image for better OCR
+        enhanced = enhance_image(image)
         # Configure Tesseract parameters
+        custom_config = r'--oem 3 --psm 6'
+        # Extract text
+        text = pytesseract.image_to_string(enhanced, config=custom_config)
+        return text.strip()
+    except Exception as e:
+        return f"OCR Error: {str(e)}"
 def main():
+    # Title and description
+    st.title("📄 Intelligent Document Scanner & OCR")
+    st.markdown("Upload a photo of a document to get a cleaned, flattened version with extracted text.")
+    st.markdown("---")
+    # Sidebar for controls
+    with st.sidebar:
+        st.header("Settings")
+        show_contour = st.checkbox("Show detected edges", value=True)
+        enhance_ocr = st.checkbox("Enhance for OCR", value=True)
+        st.markdown("---")
+        st.markdown("### Tips:")
+        st.markdown("""
+        1. Good lighting is essential
+        2. Capture entire document
+        3. Avoid shadows
+        4. Keep camera parallel to document
+        """)
     # File uploader
     uploaded_file = st.file_uploader(
         "Choose an image file",
+        type=['jpg', 'jpeg', 'png', 'bmp', 'tiff'],
+        help="Supported formats: JPG, PNG, BMP, TIFF"
     )
     if uploaded_file is not None:
+        try:
+            # Read image
+            image = Image.open(uploaded_file)
+            st.session_state.original_image = image
+            # Convert to OpenCV format
+            image_array = np.array(image)
+            # Convert RGB to BGR for OpenCV
+            if len(image_array.shape) == 3:
+                if image_array.shape[2] == 3:  # RGB
+                    image_array = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
+                elif image_array.shape[2] == 4:  # RGBA
+                    image_array = cv2.cvtColor(image_array, cv2.COLOR_RGBA2BGR)
+            # Display original image
+            st.subheader("📸 Original Image")
+            col1, col2, col3 = st.columns([1, 2, 1])
             with col2:
+                st.image(image, use_column_width=True, caption=f"Size: {image.size[0]}x{image.size[1]}")
+            # Process button
             st.markdown("---")
+            if st.button("🔍 Process Document", key="process"):
+                with st.spinner("Processing..."):
+                    # Step 1: Preprocess
+                    processed = preprocess_image(image_array)
+                    # Step 2: Find contour
+                    contour = find_document_contour(processed)
+                    # Create contour visualization if requested
+                    if show_contour and contour is not None:
+                        contour_img = image_array.copy()
+                        cv2.drawContours(contour_img, [contour], -1, (0, 255, 0), 3)
+                        st.session_state.contour_image = contour_img
+                    if contour is not None:
+                        # Step 3: Apply perspective transform
+                        warped = four_point_transform(image_array, contour.reshape(4, 2))
+                        # Step 4: Enhance if requested
+                        if enhance_ocr:
+                            final_image = enhance_image(warped)
+                        else:
+                            final_image = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
+                        # Step 5: Extract text
+                        extracted_text = extract_text_from_image(final_image)
+                        # Store results
+                        st.session_state.processed_image = final_image
+                        st.session_state.extracted_text = extracted_text
+                        st.success("✅ Document processed successfully!")
+                    else:
+                        st.warning("⚠️ Could not detect document edges. Try adjusting the image or lighting.")
+                        # If no contour found, try to enhance the whole image
+                        if enhance_ocr:
+                            final_image = enhance_image(image_array)
+                        else:
+                            final_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)
+                        extracted_text = extract_text_from_image(final_image)
+                        st.session_state.processed_image = final_image
+                        st.session_state.extracted_text = extracted_text
+            # Display results if available
+            if st.session_state.processed_image is not None:
+                st.markdown("---")
+                st.subheader("📄 Processed Results")
+                # Create columns for images
+                col1, col2 = st.columns(2)
+                with col1:
+                    if st.session_state.contour_image is not None and show_contour:
+                        # Convert BGR to RGB for display
+                        contour_rgb = cv2.cvtColor(st.session_state.contour_image, cv2.COLOR_BGR2RGB)
+                        st.image(contour_rgb, use_column_width=True, caption="Detected Document Edges")
+                    elif st.session_state.original_image is not None:
+                        st.image(st.session_state.original_image, use_column_width=True, caption="Original Image")
+                with col2:
+                    if st.session_state.processed_image is not None:
+                        st.image(st.session_state.processed_image,
+                                use_column_width=True,
+                                clamp=True,
+                                caption="Processed & Flattened Document")
+                # OCR Results
+                st.markdown("---")
+                st.subheader("📝 Extracted Text")
                 if st.session_state.extracted_text:
+                    # Display text in expandable area
+                    with st.expander("View Extracted Text", expanded=True):
+                        st.text_area("OCR Output",
+                                    st.session_state.extracted_text,
+                                    height=200,
+                                    label_visibility="collapsed")
+                    # Text statistics
+                    word_count = len(st.session_state.extracted_text.split())
+                    char_count = len(st.session_state.extracted_text)
+                    st.caption(f"📊 Statistics: {word_count} words, {char_count} characters")
+                    # Download buttons
+                    st.markdown("---")
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        # Download processed image
+                        if st.session_state.processed_image is not None:
+                            # Convert to PIL Image for download
+                            if len(st.session_state.processed_image.shape) == 2:  # Grayscale
+                                pil_img = Image.fromarray(st.session_state.processed_image)
+                            else:  # Color
+                                rgb_img = cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB)
+                                pil_img = Image.fromarray(rgb_img)
+                            # Convert to bytes
+                            img_byte_arr = io.BytesIO()
+                            pil_img.save(img_byte_arr, format='PNG')
+                            img_byte_arr = img_byte_arr.getvalue()
+                            st.download_button(
+                                label="💾 Download Processed Image",
+                                data=img_byte_arr,
+                                file_name="processed_document.png",
+                                mime="image/png",
+                                use_container_width=True
+                            )
+                    with col2:
+                        # Download text
+                        if st.session_state.extracted_text:
+                            st.download_button(
+                                label="💾 Download Text",
+                                data=st.session_state.extracted_text,
+                                file_name="extracted_text.txt",
+                                mime="text/plain",
+                                use_container_width=True
+                            )
+                else:
+                    st.info("No text was extracted from the document.")
+        except Exception as e:
+            st.error(f"Error processing image: {str(e)}")
+            st.info("Please try again with a different image.")
     else:
+        # Show instructions when no file is uploaded
         st.markdown("---")
         col1, col2, col3 = st.columns([1, 2, 1])
         with col2:
             st.info("👆 Please upload an image file to begin")
+        # Sample workflow
+        st.markdown("### How it works:")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.markdown("""
+            **1. Upload**
+            Choose a photo of your document
+            """)
+        with col2:
+            st.markdown("""
+            **2. Detect**
+            Automatically finds document edges
+            """)
+        with col3:
+            st.markdown("""
+            **3. Transform**
+            Corrects perspective and deskews
+            """)
+        with col4:
+            st.markdown("""
+            **4. Extract**
+            Performs OCR to get text
+            """)
+        # Technical details
+        with st.expander("Technical Details"):
+            st.markdown("""
+            **Algorithms Used:**
+            - **Edge Detection**: Canny edge detector with adaptive thresholds
+            - **Contour Detection**: Finds largest quadrilateral in image
+            - **Perspective Correction**: Homography transformation using four-point perspective
+            - **Image Enhancement**: Adaptive thresholding, denoising, and sharpening
+            - **OCR**: Tesseract OCR engine with optimized preprocessing
+            **Tech Stack:**
+            - OpenCV for computer vision
+            - Tesseract for OCR
+            - Streamlit for web interface
+            - NumPy for numerical operations
+            """)
 if __name__ == "__main__":
     main()