diff --git "a/ProofCheck/pdf_comparator.py" "b/ProofCheck/pdf_comparator.py"
--- "a/ProofCheck/pdf_comparator.py"
+++ "b/ProofCheck/pdf_comparator.py"
@@ -1,1938 +1,399 @@
-import os
-import cv2
+#!/usr/bin/env python3
+"""
+Gradio PDF Comparison Tool
+Upload two PDF files and get comprehensive analysis including differences, OCR, barcodes, and CMYK analysis.
+"""
+
+import os, sys, re, csv, json, io
+from dataclasses import dataclass
+from typing import List, Tuple, Optional
+import tempfile
+
 import numpy as np
-from PIL import Image, ImageDraw, ImageFont
-import pytesseract
+from PIL import Image, ImageChops, ImageDraw, UnidentifiedImageError
 from pdf2image import convert_from_path
-from pyzbar.pyzbar import decode
-from spellchecker import SpellChecker
-import nltk
-from skimage.metrics import structural_similarity as ssim
-from skimage import color
-import json
-import tempfile
-import shutil
-import re
-import time
-import signal
-import unicodedata
+from skimage.measure import label, regionprops
+from skimage.morphology import dilation, footprint_rectangle
+import gradio as gr
 
-# Safe import for regex with fallback
+# Optional features
 try:
-    import regex as _re
-    _USE_REGEX = True
-except ImportError:
-    import re as _re
-    _USE_REGEX = False
-
-TOKEN_PATTERN = r"(?:\p{L})(?:[\p{L}'-]{1,})" if _USE_REGEX else r"[A-Za-z][A-Za-z'-]{1,}"
+    import pytesseract
+    HAS_OCR = True
+except Exception:
+    pytesseract = None
+    HAS_OCR = False
 
-# Domain whitelist for spell checking
-DOMAIN_WHITELIST = {
-    # units / abbreviations
-    "mg", "mg/g", "ml", "g", "thc", "cbd", "tcm", "mct",
-    # common packaging terms / bilingual words you expect
-    "gouttes", "tennir", "net", "zoom", "tytann", "dome", "drops",
-    # brand or proper names you want to ignore completely
-    "purified", "brands", "tytann", "dome", "drops",
-}
-# lowercase everything in whitelist for comparisons
-DOMAIN_WHITELIST = {w.lower() for w in DOMAIN_WHITELIST}
-
-def _likely_french(token: str) -> bool:
-    """Helper: quick language guess per token"""
-    if _USE_REGEX:
-        # any Latin letter outside ASCII => probably FR (é, è, ç…)
-        return bool(_re.search(r"[\p{Letter}&&\p{Latin}&&[^A-Za-z]]", token))
-    # fallback: any non-ascii letter
-    return any((not ('a' <= c.lower() <= 'z')) and c.isalpha() for c in token)
-
-# Try to import additional barcode libraries
 try:
-    import zxing
-    ZXING_AVAILABLE = True
-except ImportError:
-    ZXING_AVAILABLE = False
-    print("zxing-cpp not available, using pyzbar only")
+    from spellchecker import SpellChecker
+    HAS_SPELLCHECK = True
+except Exception:
+    SpellChecker = None
+    HAS_SPELLCHECK = False
 
 try:
-    from dbr import BarcodeReader
-    DBR_AVAILABLE = True
-    print("Dynamsoft Barcode Reader available")
-except ImportError:
-    DBR_AVAILABLE = False
-    print("Dynamsoft Barcode Reader not available")
+    from pyzbar.pyzbar import decode as zbar_decode
+    HAS_BARCODE = True
+except Exception:
+    zbar_decode = None
+    HAS_BARCODE = False
 
-class TimeoutError(Exception):
-    pass
+# -------------------- Core Data --------------------
+@dataclass
+class Box:
+    y1: int; x1: int; y2: int; x2: int; area: int
 
-def timeout_handler(signum, frame):
-    raise TimeoutError("Operation timed out")
+# -------------------- Helpers ----------------------
+def _is_pdf(path: str) -> bool:
+    return os.path.splitext(path.lower())[1] == ".pdf"
 
-class PDFComparator:
-    def __init__(self):
-        # Initialize spell checkers for English and French
-        self.english_spellchecker = SpellChecker(language='en')
-        self.french_spellchecker = SpellChecker(language='fr')
-        
-        # Add domain whitelist words to spell checkers
-        for w in DOMAIN_WHITELIST:
-            self.english_spellchecker.word_frequency.add(w)
-            self.french_spellchecker.word_frequency.add(w)
-        
-        # Download required NLTK data
-        try:
-            nltk.data.find('tokenizers/punkt')
-        except LookupError:
-            nltk.download('punkt')
-    
-    def safe_execute(self, func, *args, timeout=30, **kwargs):
-        """Execute a function with timeout protection"""
-        try:
-            # Set timeout signal
-            signal.signal(signal.SIGALRM, timeout_handler)
-            signal.alarm(timeout)
-            
-            # Execute function
-            result = func(*args, **kwargs)
-            
-            # Cancel timeout
-            signal.alarm(0)
-            return result
-            
-        except TimeoutError:
-            print(f"Function {func.__name__} timed out after {timeout} seconds")
-            return None
-        except Exception as e:
-            print(f"Error in {func.__name__}: {str(e)}")
-            return None
-        finally:
-            signal.alarm(0)
-    
-    def validate_pdf(self, pdf_path):
-        """Validate that PDF contains '50 Carroll' using enhanced OCR for tiny fonts"""
-        try:
-            print(f"Validating PDF: {pdf_path}")
-            
-            # Try multiple DPI settings for better tiny font detection
-            dpi_settings = [300, 400, 600, 800]
-            
-            for dpi in dpi_settings:
-                print(f"Trying DPI {dpi} for tiny font detection...")
-                
-                # Convert PDF to images with current DPI
-                images = convert_from_path(pdf_path, dpi=dpi)
-                print(f"Converted PDF to {len(images)} images at {dpi} DPI")
-                
-                for page_num, image in enumerate(images):
-                    print(f"Processing page {page_num + 1} at {dpi} DPI...")
-                    
-                    # Convert PIL image to OpenCV format
-                    opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-                    
-                    # Enhanced preprocessing for tiny fonts
-                    processed_image = self.enhance_image_for_tiny_fonts(opencv_image)
-                    
-                    # Try multiple OCR configurations
-                    ocr_configs = [
-                        '--oem 3 --psm 6',  # Assume uniform block of text
-                        '--oem 3 --psm 8',  # Single word
-                        '--oem 3 --psm 13', # Raw line
-                        '--oem 1 --psm 6',  # Legacy engine
-                        '--oem 3 --psm 3',  # Fully automatic page segmentation
-                    ]
-                    
-                    for config in ocr_configs:
-                        try:
-                            # Perform OCR with current configuration
-                            text = pytesseract.image_to_string(processed_image, config=config)
-                            
-                            # Debug: Show first 300 characters of extracted text
-                            debug_text = text[:300].replace('\n', ' ').replace('\r', ' ')
-                            print(f"Page {page_num + 1} text (DPI {dpi}, config: {config}): '{debug_text}...'")
-                            
-                            # Check for "50 Carroll" with various patterns
-                            patterns = ["50 Carroll", "50 carroll", "50Carroll", "50carroll", "50 Carroll", "50 carroll"]
-                            for pattern in patterns:
-                                if pattern in text or pattern.lower() in text.lower():
-                                    print(f"Found '{pattern}' in page {page_num + 1} (DPI {dpi}, config: {config})")
-                                    return True
-                            
-                        except Exception as ocr_error:
-                            print(f"OCR error with config {config}: {str(ocr_error)}")
-                            continue
-            
-            print("Validation failed: '50 Carroll' not found in any page with any DPI or OCR config")
-            return False
-            
-        except Exception as e:
-            print(f"Error validating PDF: {str(e)}")
-            raise Exception(f"Error validating PDF: {str(e)}")
-    
-    def enhance_image_for_tiny_fonts(self, image):
-        """Enhance image specifically for tiny font OCR"""
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
-            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
-            enhanced = clahe.apply(gray)
-            
-            # Apply bilateral filter to reduce noise while preserving edges
-            denoised = cv2.bilateralFilter(enhanced, 9, 75, 75)
-            
-            # Apply unsharp masking to enhance edges
-            gaussian = cv2.GaussianBlur(denoised, (0, 0), 2.0)
-            unsharp_mask = cv2.addWeighted(denoised, 1.5, gaussian, -0.5, 0)
-            
-            # Apply adaptive thresholding
-            thresh = cv2.adaptiveThreshold(unsharp_mask, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-            
-            # Apply morphological operations to clean up
-            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
-            cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
-            
-            return cleaned
-            
-        except Exception as e:
-            print(f"Error enhancing image for tiny fonts: {str(e)}")
-            return image
-    
-    def extract_text_from_pdf(self, pdf_path):
-        """Extract text from PDF with multi-color text detection."""
-        try:
-            # Try to extract embedded text first
-            embedded_text = ""
-            try:
-                import fitz  # PyMuPDF
-                doc = fitz.open(pdf_path)
-                all_text = []
-                any_text = False
-                for i, page in enumerate(doc):
-                    t = page.get_text()
-                    any_text |= bool(t.strip())
-                    all_text.append({"page": i+1, "text": t, "image": None})
-                doc.close()
-                if any_text:
-                    # render images for color diff/barcode only when needed
-                    images = convert_from_path(pdf_path, dpi=600)
-                    for d, im in zip(all_text, images):
-                        d["image"] = im
-                    return all_text
-            except Exception:
-                pass
+def load_first_page(path: str, dpi: int = 300) -> Image.Image:
+    if _is_pdf(path):
+        imgs = convert_from_path(path, dpi=dpi, first_page=1, last_page=1)
+        if not imgs:
+            raise ValueError(f"No pages in PDF: {path}")
+        return imgs[0].convert("RGB")
+    return Image.open(path).convert("RGB")
 
-            # Enhanced OCR path with multi-color text detection
-            print("Extracting text with multi-color detection...")
-            images = convert_from_path(pdf_path, dpi=600)
-            all_text = []
-            
-            for page_num, image in enumerate(images):
-                opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-                
-                # Multi-color text extraction
-                combined_text = self.extract_multi_color_text(opencv_image)
-                
-                all_text.append({
-                    'page': page_num + 1,
-                    'text': combined_text,
-                    'image': image
-                })
-            
-            return all_text
-            
-        except Exception as e:
-            raise Exception(f"Error extracting text from PDF: {str(e)}")
-    
-    def extract_multi_color_text(self, image):
-        """Extract text from image in various colors using multiple preprocessing methods."""
-        try:
-            combined_text = ""
-            
-            # Method 1: Standard black text detection
-            print("Method 1: Standard black text detection")
-            processed_image = self.enhance_image_for_tiny_fonts(image)
-            text1 = self.ocr_with_multiple_configs(processed_image)
-            combined_text += text1 + " "
-            
-            # Method 2: Inverted text detection (for white text on dark background)
-            print("Method 2: Inverted text detection")
-            inverted_image = self.create_inverted_image(image)
-            text2 = self.ocr_with_multiple_configs(inverted_image)
-            combined_text += text2 + " "
-            
-            # Method 3: Color channel separation for colored text
-            print("Method 3: Color channel separation")
-            for channel_name, channel_image in self.extract_color_channels(image):
-                text3 = self.ocr_with_multiple_configs(channel_image)
-                combined_text += text3 + " "
-            
-            # Method 4: Edge-based text detection
-            print("Method 4: Edge-based text detection")
-            edge_image = self.create_edge_enhanced_image(image)
-            text4 = self.ocr_with_multiple_configs(edge_image)
-            combined_text += text4 + " "
-            
-            return combined_text.strip()
-            
-        except Exception as e:
-            print(f"Error in multi-color text extraction: {str(e)}")
-            return ""
-    
-    def create_inverted_image(self, image):
-        """Create inverted image for white text detection."""
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Invert the image
-            inverted = cv2.bitwise_not(gray)
-            
-            # Apply CLAHE for better contrast
-            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
-            enhanced = clahe.apply(inverted)
-            
-            # Apply thresholding
-            _, thresh = cv2.threshold(enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-            
-            return thresh
-            
-        except Exception as e:
-            print(f"Error creating inverted image: {str(e)}")
-            return image
-    
-    def extract_color_channels(self, image):
-        """Extract individual color channels for colored text detection."""
-        try:
-            channels = []
-            
-            # Convert to different color spaces
-            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
-            lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
-            
-            # Extract individual channels
-            b, g, r = cv2.split(image)
-            h, s, v = cv2.split(hsv)
-            l, a, b_lab = cv2.split(lab)
-            
-            # Create channel images for OCR
-            channel_images = [
-                ("blue", b),
-                ("green", g), 
-                ("red", r),
-                ("hue", h),
-                ("saturation", s),
-                ("value", v),
-                ("lightness", l)
-            ]
-            
-            for name, channel in channel_images:
-                # Apply thresholding to each channel
-                _, thresh = cv2.threshold(channel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-                channels.append((name, thresh))
-            
-            return channels
-            
-        except Exception as e:
-            print(f"Error extracting color channels: {str(e)}")
-            return []
-    
-    def create_edge_enhanced_image(self, image):
-        """Create edge-enhanced image for text detection."""
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Apply edge detection
-            edges = cv2.Canny(gray, 50, 150)
-            
-            # Dilate edges to connect text components
-            kernel = np.ones((2, 2), np.uint8)
-            dilated = cv2.dilate(edges, kernel, iterations=1)
-            
-            # Invert to get white text on black background
-            inverted = cv2.bitwise_not(dilated)
-            
-            return inverted
-            
-        except Exception as e:
-            print(f"Error creating edge-enhanced image: {str(e)}")
-            return image
-    
-    def ocr_with_multiple_configs(self, image):
-        """Perform OCR with multiple configurations."""
-        try:
-            ocr_configs = [
-                '--oem 3 --psm 6',  # Assume uniform block of text
-                '--oem 3 --psm 8',  # Single word
-                '--oem 3 --psm 13', # Raw line
-                '--oem 1 --psm 6',  # Legacy engine
-            ]
-            
-            best_text = ""
-            for config in ocr_configs:
-                try:
-                    text = pytesseract.image_to_string(image, config=config)
-                    if len(text.strip()) > len(best_text.strip()):
-                        best_text = text
-                except Exception as ocr_error:
-                    print(f"OCR error with config {config}: {str(ocr_error)}")
-                    continue
-            
-            return best_text
-            
-        except Exception as e:
-            print(f"Error in OCR with multiple configs: {str(e)}")
-            return ""
-    
-    def annotate_spelling_errors_on_image(self, pil_image, misspelled):
-        """
-        Draw one red rectangle around each misspelled token using Tesseract word boxes.
-        'misspelled' must be a list of dicts with 'word' keys (from check_spelling).
-        """
-        if not misspelled:
-            return pil_image
+def match_sizes(a: Image.Image, b: Image.Image) -> Tuple[Image.Image, Image.Image]:
+    if a.size == b.size:
+        return a, b
+    w, h = min(a.width, b.width), min(a.height, b.height)
+    return a.crop((0, 0, w, h)), b.crop((0, 0, w, h))
 
-        def _norm(s: str) -> str:
-            return unicodedata.normalize("NFKC", s).replace("'","'").strip(".,:;!?)(").lower()
+def difference_map(a: Image.Image, b: Image.Image) -> Image.Image:
+    return ImageChops.difference(a, b)
 
-        # build a quick lookup of misspelled lowercase words
-        miss_set = {_norm(m["word"]) for m in misspelled}
+def find_diff_boxes(diff_img: Image.Image, threshold: int = 12, min_area: int = 25) -> List[Box]:
+    arr = np.asarray(diff_img).astype(np.uint16)
+    gray = arr.max(axis=2).astype(np.uint8)
+    mask = (gray >= threshold).astype(np.uint8)
+    mask = dilation(mask, footprint_rectangle((3, 3)))
+    labeled = label(mask, connectivity=2)
+    out: List[Box] = []
+    for p in regionprops(labeled):
+        if p.area < min_area:
+            continue
+        minr, minc, maxr, maxc = p.bbox
+        out.append(Box(minr, minc, maxr, maxc, int(p.area)))
+    return out
 
-        # run word-level OCR to get boxes
-        img = pil_image
-        try:
-            data = pytesseract.image_to_data(
-                img,
-                lang="eng+fra",
-                config="--oem 3 --psm 6",
-                output_type=pytesseract.Output.DICT,
-            )
-        except Exception as e:
-            print("image_to_data failed:", e)
-            return img
+def draw_boxes_multi(img: Image.Image, red_boxes: List[Box], cyan_boxes: List[Box], green_boxes: List[Box] = None,
+                     width: int = 3, red_labels: List[int] = None) -> Image.Image:
+    out = img.copy(); d = ImageDraw.Draw(out)
+    # red (diff)
+    for b in red_boxes:
+        for w in range(width):
+            d.rectangle([b.x1-w,b.y1-w,b.x2+w,b.y2+w], outline=(255,0,0))
+    # labels for red boxes
+    if red_labels:
+        for idx, b in enumerate(red_boxes):
+            label = str(red_labels[idx]) if idx < len(red_labels) else str(idx+1)
+            tx = max(0, b.x1 + 3); ty = max(0, b.y1 + 3)
+            d.rectangle([tx-2, ty-2, tx+14, ty+14], fill=(255,255,255))
+            d.text((tx, ty), label, fill=(0,0,0))
+    # cyan (misspellings)
+    for b in cyan_boxes:
+        for w in range(width):
+            d.rectangle([b.x1-w,b.y1-w,b.x2+w,b.y2+w], outline=(0,255,255))
+    # green (barcodes)
+    if green_boxes:
+        for b in green_boxes:
+            for w in range(width):
+                d.rectangle([b.x1-w,b.y1-w,b.x2+w,b.y2+w], outline=(0,255,0))
+    return out
 
-        draw = ImageDraw.Draw(img)
-        n = len(data.get("text", []))
-        for i in range(n):
-            word = (data["text"][i] or "").strip()
-            if not word:
-                continue
-            clean = _norm(word)
+def make_red_overlay(a: Image.Image, b: Image.Image) -> Image.Image:
+    A = np.asarray(a).copy(); B = np.asarray(b)
+    mask = np.any(A != B, axis=2)
+    A[mask] = [255, 0, 0]
+    return Image.fromarray(A)
 
-            if clean and clean in miss_set:
-                x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
-                # draw a distinct box for this one word
-                draw.rectangle([x, y, x + w, y + h], outline="red", width=4)
+# -------------------- OCR + Spellcheck -------------
+def normalize_token(token: str) -> str:
+    cleaned = re.sub(r"[^A-Za-z']", "", token)
+    return cleaned.lower()
 
-        return img
-    
-    def detect_barcodes_qr_codes(self, image):
-        """Detect and decode barcodes and QR codes with timeout protection"""
-        try:
-            print("Starting barcode detection...")
-            start_time = time.time()
-            
-            # Convert PIL image to OpenCV format
-            opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-            
-            all_barcodes = []
-            
-            # Method 1: Basic pyzbar detection (fastest)
-            print("Method 1: Basic pyzbar detection")
-            pyzbar_results = self.detect_with_pyzbar_basic(opencv_image)
-            if pyzbar_results:
-                all_barcodes.extend(pyzbar_results)
-                print(f"Found {len(pyzbar_results)} barcodes with basic pyzbar")
-            
-            # Method 2: Dynamsoft Barcode Reader (if available)
-            if DBR_AVAILABLE:
-                print("Method 2: Dynamsoft Barcode Reader")
-                dbr_results = self.detect_with_dynamsoft(opencv_image)
-                if dbr_results:
-                    all_barcodes.extend(dbr_results)
-                    print(f"Found {len(dbr_results)} barcodes with Dynamsoft")
-            
-            # Method 3: Enhanced preprocessing (always run for better detection)
-            print("Method 3: Enhanced preprocessing")
-            enhanced_results = self.detect_with_enhanced_preprocessing(opencv_image)
-            if enhanced_results:
-                all_barcodes.extend(enhanced_results)
-                print(f"Found {len(enhanced_results)} additional barcodes with enhanced preprocessing")
-            
-            # Method 4: Small barcode detection (always run for better detection)
-            print("Method 4: Small barcode detection")
-            small_results = self.detect_small_barcodes_simple(opencv_image)
-            if small_results:
-                all_barcodes.extend(small_results)
-                print(f"Found {len(small_results)} additional small barcodes")
-            
-            # Remove duplicates
-            unique_barcodes = self.remove_duplicate_barcodes(all_barcodes)
-            
-            # Enhance results
-            enhanced_barcodes = self.enhance_barcode_data(unique_barcodes)
-            
-            elapsed_time = time.time() - start_time
-            print(f"Barcode detection completed in {elapsed_time:.2f} seconds. Found {len(enhanced_barcodes)} unique barcodes.")
-            
-            return enhanced_barcodes
-            
-        except Exception as e:
-            print(f"Error in barcode detection: {str(e)}")
-            return []
-    
-    def detect_with_pyzbar_basic(self, image):
-        """Basic pyzbar detection without complex preprocessing"""
-        results = []
-        
-        try:
-            # Simple grayscale conversion
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Try original image
-            decoded_objects = decode(gray)
-            for obj in decoded_objects:
-                barcode_info = {
-                    'type': obj.type,
-                    'data': obj.data.decode('utf-8', errors='ignore'),
-                    'rect': obj.rect,
-                    'polygon': obj.polygon,
-                    'quality': getattr(obj, 'quality', 0),
-                    'orientation': self.detect_barcode_orientation(obj),
-                    'method': 'pyzbar_basic'
-                }
-                
-                if 'databar' in obj.type.lower():
-                    barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                
-                results.append(barcode_info)
-            
-            # Try with simple contrast enhancement
-            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-            enhanced = clahe.apply(gray)
-            decoded_objects = decode(enhanced)
-            
-            for obj in decoded_objects:
-                barcode_info = {
-                    'type': obj.type,
-                    'data': obj.data.decode('utf-8', errors='ignore'),
-                    'rect': obj.rect,
-                    'polygon': obj.polygon,
-                    'quality': getattr(obj, 'quality', 0),
-                    'orientation': self.detect_barcode_orientation(obj),
-                    'method': 'pyzbar_enhanced'
-                }
-                
-                if 'databar' in obj.type.lower():
-                    barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                
-                results.append(barcode_info)
-            
-        except Exception as e:
-            print(f"Error in basic pyzbar detection: {str(e)}")
-        
-        return results
-    
-    def detect_with_dynamsoft(self, image):
-        """Detect barcodes using Dynamsoft Barcode Reader"""
-        results = []
-        
-        try:
-            if not DBR_AVAILABLE:
-                return results
-            
-            # Initialize Dynamsoft Barcode Reader
-            reader = BarcodeReader()
-            
-            # Convert OpenCV image to bytes for Dynamsoft
-            success, buffer = cv2.imencode('.png', image)
-            if not success:
-                print("Failed to encode image for Dynamsoft")
-                return results
-            
-            image_bytes = buffer.tobytes()
-            
-            # Decode barcodes
-            text_results = reader.decode_file_stream(image_bytes)
-            
-            for result in text_results:
-                barcode_info = {
-                    'type': result.barcode_format_string,
-                    'data': result.barcode_text,
-                    'rect': type('Rect', (), {
-                        'left': result.localization_result.x1,
-                        'top': result.localization_result.y1,
-                        'width': result.localization_result.x2 - result.localization_result.x1,
-                        'height': result.localization_result.y2 - result.localization_result.y1
-                    })(),
-                    'polygon': [
-                        (result.localization_result.x1, result.localization_result.y1),
-                        (result.localization_result.x2, result.localization_result.y1),
-                        (result.localization_result.x2, result.localization_result.y2),
-                        (result.localization_result.x1, result.localization_result.y2)
-                    ],
-                    'quality': result.confidence,
-                    'orientation': self.detect_barcode_orientation(result),
-                    'method': 'dynamsoft'
-                }
-                
-                # Enhanced DataBar Expanded detection
-                if 'databar' in result.barcode_format_string.lower() or 'expanded' in result.barcode_format_string.lower():
-                    barcode_info['expanded_data'] = self.parse_databar_expanded(result.barcode_text)
-                
-                results.append(barcode_info)
-            
-            print(f"Dynamsoft detected {len(results)} barcodes")
-            
-        except Exception as e:
-            print(f"Error in Dynamsoft detection: {str(e)}")
-        
-        return results
-    
-    def detect_with_enhanced_preprocessing(self, image):
-        """Enhanced preprocessing with limited methods"""
-        results = []
-        
-        try:
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Limited preprocessing methods
-            processed_images = [
-                gray,  # Original
-                cv2.resize(gray, (gray.shape[1] * 3, gray.shape[0] * 3), interpolation=cv2.INTER_CUBIC),  # 3x scale
-                cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2),  # Adaptive threshold
-            ]
-            
-            for i, processed_image in enumerate(processed_images):
-                try:
-                    decoded_objects = decode(processed_image)
-                    
-                    for obj in decoded_objects:
-                        barcode_info = {
-                            'type': obj.type,
-                            'data': obj.data.decode('utf-8', errors='ignore'),
-                            'rect': obj.rect,
-                            'polygon': obj.polygon,
-                            'quality': getattr(obj, 'quality', 0),
-                            'orientation': self.detect_barcode_orientation(obj),
-                            'method': f'enhanced_preprocessing_{i}'
-                        }
-                        
-                        if 'databar' in obj.type.lower():
-                            barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                        
-                        results.append(barcode_info)
-                        
-                except Exception as e:
-                    print(f"Error in enhanced preprocessing method {i}: {str(e)}")
-                    continue
-        
-        except Exception as e:
-            print(f"Error in enhanced preprocessing: {str(e)}")
-        
-        return results
-    
-    def detect_small_barcodes_simple(self, image):
-        """Simplified small barcode detection"""
-        results = []
-        
-        try:
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Only try 3x and 4x scaling
-            scale_factors = [3.0, 4.0]
-            
-            for scale in scale_factors:
-                try:
-                    height, width = gray.shape
-                    new_height, new_width = int(height * scale), int(width * scale)
-                    scaled = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-                    
-                    decoded_objects = decode(scaled)
-                    
-                    for obj in decoded_objects:
-                        # Scale back coordinates
-                        scale_factor = width / new_width
-                        scaled_rect = type('Rect', (), {
-                            'left': int(obj.rect.left * scale_factor),
-                            'top': int(obj.rect.top * scale_factor),
-                            'width': int(obj.rect.width * scale_factor),
-                            'height': int(obj.rect.height * scale_factor)
-                        })()
-                        
-                        barcode_info = {
-                            'type': obj.type,
-                            'data': obj.data.decode('utf-8', errors='ignore'),
-                            'rect': scaled_rect,
-                            'polygon': obj.polygon,
-                            'quality': getattr(obj, 'quality', 0),
-                            'orientation': self.detect_barcode_orientation(obj),
-                            'method': f'small_barcode_{scale}x',
-                            'size_category': 'small'
-                        }
-                        
-                        if 'databar' in obj.type.lower():
-                            barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                        
-                        results.append(barcode_info)
-                        
-                except Exception as e:
-                    print(f"Error in small barcode detection at {scale}x: {str(e)}")
-                    continue
-        
-        except Exception as e:
-            print(f"Error in small barcode detection: {str(e)}")
-        
-        return results
-    
-    def preprocess_image_for_ocr(self, image):
-        """Preprocess image for better OCR results"""
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Apply different preprocessing techniques
-            
-            # 1. Resize image to improve small text recognition
-            height, width = gray.shape
-            scale_factor = 3.0  # Scale up for better small font recognition
-            new_height, new_width = int(height * scale_factor), int(width * scale_factor)
-            resized = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-            
-            # 2. Apply Gaussian blur to reduce noise
-            blurred = cv2.GaussianBlur(resized, (1, 1), 0)
-            
-            # 3. Apply adaptive thresholding for better text separation
-            thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-            
-            # 4. Apply morphological operations to clean up text
-            kernel = np.ones((1, 1), np.uint8)
-            cleaned = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
-            
-            # 5. Apply contrast enhancement
-            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-            enhanced = clahe.apply(cleaned)
-            
-            return enhanced
-            
-        except Exception as e:
-            print(f"Error preprocessing image: {str(e)}")
-            return image  # Return original if preprocessing fails
-    
-    def preprocess_for_barcode_detection(self, image):
-        """Preprocess image with multiple techniques for better barcode detection"""
-        processed_images = [image]  # Start with original
-        
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            processed_images.append(gray)
-            
-            # Apply different preprocessing techniques
-            
-            # 1. Contrast enhancement
-            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
-            enhanced = clahe.apply(gray)
-            processed_images.append(enhanced)
-            
-            # 2. Gaussian blur for noise reduction
-            blurred = cv2.GaussianBlur(gray, (3, 3), 0)
-            processed_images.append(blurred)
-            
-            # 3. Adaptive thresholding
-            thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-            processed_images.append(thresh)
-            
-            # 4. Edge enhancement for better barcode detection
-            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
-            sharpened = cv2.filter2D(gray, -1, kernel)
-            processed_images.append(sharpened)
-            
-            # 5. Scale up for small barcodes
-            height, width = gray.shape
-            scale_factor = 3.0
-            new_height, new_width = int(height * scale_factor), int(width * scale_factor)
-            scaled = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-            processed_images.append(scaled)
-            
-        except Exception as e:
-            print(f"Error in barcode preprocessing: {str(e)}")
-        
-        return processed_images
-    
-    def preprocess_for_databar(self, gray_image):
-        """Specialized preprocessing for DataBar Expanded Stacked barcodes"""
-        processed_images = []
-        
-        try:
-            # Original grayscale
-            processed_images.append(gray_image)
-            
-            # 1. High contrast enhancement for DataBar
-            clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
-            enhanced = clahe.apply(gray_image)
-            processed_images.append(enhanced)
-            
-            # 2. Bilateral filter to preserve edges while reducing noise
-            bilateral = cv2.bilateralFilter(gray_image, 9, 75, 75)
-            processed_images.append(bilateral)
-            
-            # 3. Adaptive thresholding with different parameters
-            thresh1 = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 2)
-            processed_images.append(thresh1)
-            
-            thresh2 = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-            processed_images.append(thresh2)
-            
-            # 4. Scale up for better DataBar detection
-            height, width = gray_image.shape
-            scale_factors = [2.0, 3.0, 4.0]
-            
-            for scale in scale_factors:
-                new_height, new_width = int(height * scale), int(width * scale)
-                scaled = cv2.resize(gray_image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-                processed_images.append(scaled)
-            
-            # 5. Edge enhancement specifically for DataBar
-            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
-            sharpened = cv2.filter2D(gray_image, -1, kernel)
-            processed_images.append(sharpened)
-            
-            # 6. Morphological operations for DataBar
-            kernel = np.ones((2, 2), np.uint8)
-            morphed = cv2.morphologyEx(gray_image, cv2.MORPH_CLOSE, kernel)
-            processed_images.append(morphed)
-            
-        except Exception as e:
-            print(f"Error in DataBar preprocessing: {str(e)}")
-        
-        return processed_images
-    
-    def detect_with_transformations(self, image):
-        """Detect barcodes using multiple image transformations"""
-        results = []
-        
-        try:
-            # Try different rotations
-            angles = [0, 90, 180, 270]
-            
-            for angle in angles:
-                if angle == 0:
-                    rotated_image = image
-                else:
-                    height, width = image.shape[:2]
-                    center = (width // 2, height // 2)
-                    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
-                    rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))
-                
-                # Try to detect barcodes in rotated image
-                try:
-                    decoded_objects = decode(rotated_image)
-                    
-                    for obj in decoded_objects:
-                        barcode_info = {
-                            'type': obj.type,
-                            'data': obj.data.decode('utf-8', errors='ignore'),
-                            'rect': obj.rect,
-                            'polygon': obj.polygon,
-                            'quality': getattr(obj, 'quality', 0),
-                            'orientation': f"{angle}°",
-                            'method': f'transform_{angle}deg'
-                        }
-                        
-                        # Enhanced DataBar Expanded detection
-                        if 'databar' in obj.type.lower() or 'expanded' in obj.type.lower():
-                            barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                        
-                        # Check for multi-stack barcodes
-                        if self.is_multi_stack_barcode(obj, rotated_image):
-                            barcode_info['stack_type'] = self.detect_stack_type(obj, rotated_image)
-                        
-                        results.append(barcode_info)
-                        
-                except Exception as e:
-                    print(f"Error in transformation detection at {angle}°: {str(e)}")
-                    continue
-        
-        except Exception as e:
-            print(f"Error in transformation detection: {str(e)}")
-        
-        return results
-    
-    def detect_small_barcodes(self, image):
-        """Specialized detection for small barcodes and QR codes"""
-        results = []
-        
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Apply specialized preprocessing for small barcodes
-            processed_images = self.preprocess_for_small_barcodes(gray)
-            
-            for processed_image in processed_images:
-                try:
-                    decoded_objects = decode(processed_image)
-                    
-                    for obj in decoded_objects:
-                        # Check if this is a small barcode (less than 50x50 pixels)
-                        if obj.rect.width < 50 or obj.rect.height < 50:
-                            barcode_info = {
-                                'type': obj.type,
-                                'data': obj.data.decode('utf-8', errors='ignore'),
-                                'rect': obj.rect,
-                                'polygon': obj.polygon,
-                                'quality': getattr(obj, 'quality', 0),
-                                'orientation': self.detect_barcode_orientation(obj),
-                                'method': 'small_barcode_detection',
-                                'size_category': 'small'
-                            }
-                            
-                            # Enhanced DataBar Expanded detection
-                            if 'databar' in obj.type.lower() or 'expanded' in obj.type.lower():
-                                barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                            
-                            # Check for multi-stack barcodes
-                            if self.is_multi_stack_barcode(obj, image):
-                                barcode_info['stack_type'] = self.detect_stack_type(obj, image)
-                            
-                            results.append(barcode_info)
-                            
-                except Exception as e:
-                    print(f"Error in small barcode detection: {str(e)}")
-                    continue
-        
-        except Exception as e:
-            print(f"Error in small barcode preprocessing: {str(e)}")
-        
-        return results
-    
-    def preprocess_for_small_barcodes(self, gray_image):
-        """Specialized preprocessing for small barcodes and QR codes"""
-        processed_images = []
-        
-        try:
-            # Original grayscale
-            processed_images.append(gray_image)
-            
-            # 1. Multiple high-resolution scaling for small barcodes
-            height, width = gray_image.shape
-            scale_factors = [4.0, 5.0, 6.0, 8.0]  # Higher scaling for small barcodes
-            
-            for scale in scale_factors:
-                new_height, new_width = int(height * scale), int(width * scale)
-                scaled = cv2.resize(gray_image, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-                processed_images.append(scaled)
-            
-            # 2. Aggressive contrast enhancement
-            clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(8, 8))
-            enhanced = clahe.apply(gray_image)
-            processed_images.append(enhanced)
-            
-            # 3. Unsharp masking for edge enhancement
-            gaussian = cv2.GaussianBlur(gray_image, (0, 0), 2.0)
-            unsharp = cv2.addWeighted(gray_image, 1.5, gaussian, -0.5, 0)
-            processed_images.append(unsharp)
-            
-            # 4. Multiple thresholding methods
-            # Otsu's thresholding
-            _, otsu = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-            processed_images.append(otsu)
-            
-            # Adaptive thresholding with different parameters
-            adaptive1 = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 2)
-            processed_images.append(adaptive1)
-            
-            adaptive2 = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 7, 2)
-            processed_images.append(adaptive2)
-            
-            # 5. Noise reduction with different methods
-            # Bilateral filter
-            bilateral = cv2.bilateralFilter(gray_image, 9, 75, 75)
-            processed_images.append(bilateral)
-            
-            # Median filter
-            median = cv2.medianBlur(gray_image, 3)
-            processed_images.append(median)
-            
-            # 6. Edge detection and enhancement
-            # Sobel edge detection
-            sobel_x = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
-            sobel_y = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
-            sobel = np.sqrt(sobel_x**2 + sobel_y**2)
-            sobel = np.uint8(sobel * 255 / sobel.max())
-            processed_images.append(sobel)
-            
-            # 7. Morphological operations for small barcode cleanup
-            kernel = np.ones((2, 2), np.uint8)
-            morphed_close = cv2.morphologyEx(gray_image, cv2.MORPH_CLOSE, kernel)
-            processed_images.append(morphed_close)
-            
-            kernel_open = np.ones((1, 1), np.uint8)
-            morphed_open = cv2.morphologyEx(gray_image, cv2.MORPH_OPEN, kernel_open)
-            processed_images.append(morphed_open)
-            
-        except Exception as e:
-            print(f"Error in small barcode preprocessing: {str(e)}")
-        
-        return processed_images
-    
-    def detect_with_high_resolution(self, image):
-        """Detect barcodes using high-resolution processing"""
-        results = []
-        
-        try:
-            # Convert to grayscale
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            
-            # Process at multiple high resolutions
-            height, width = gray.shape
-            resolutions = [
-                (int(width * 3), int(height * 3)),   # 3x resolution
-                (int(width * 4), int(height * 4)),   # 4x resolution
-                (int(width * 6), int(height * 6))    # 6x resolution
-            ]
-            
-            for new_width, new_height in resolutions:
-                try:
-                    # Resize with high-quality interpolation
-                    resized = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
-                    
-                    # Apply high-resolution preprocessing
-                    processed = self.preprocess_high_resolution(resized)
-                    
-                    # Try to detect barcodes
-                    decoded_objects = decode(processed)
-                    
-                    for obj in decoded_objects:
-                        # Scale back the coordinates to original image size
-                        scale_factor = width / new_width
-                        scaled_rect = type('Rect', (), {
-                            'left': int(obj.rect.left * scale_factor),
-                            'top': int(obj.rect.top * scale_factor),
-                            'width': int(obj.rect.width * scale_factor),
-                            'height': int(obj.rect.height * scale_factor)
-                        })()
-                        
-                        barcode_info = {
-                            'type': obj.type,
-                            'data': obj.data.decode('utf-8', errors='ignore'),
-                            'rect': scaled_rect,
-                            'polygon': obj.polygon,
-                            'quality': getattr(obj, 'quality', 0),
-                            'orientation': self.detect_barcode_orientation(obj),
-                            'method': f'high_res_{new_width}x{new_height}',
-                            'resolution': f'{new_width}x{new_height}'
-                        }
-                        
-                        # Enhanced DataBar Expanded detection
-                        if 'databar' in obj.type.lower() or 'expanded' in obj.type.lower():
-                            barcode_info['expanded_data'] = self.parse_databar_expanded(obj.data.decode('utf-8', errors='ignore'))
-                        
-                        # Check for multi-stack barcodes
-                        if self.is_multi_stack_barcode(obj, image):
-                            barcode_info['stack_type'] = self.detect_stack_type(obj, image)
-                        
-                        results.append(barcode_info)
-                        
-                except Exception as e:
-                    print(f"Error in high-resolution detection at {new_width}x{new_height}: {str(e)}")
-                    continue
-        
-        except Exception as e:
-            print(f"Error in high-resolution detection: {str(e)}")
-        
-        return results
-    
-    def preprocess_high_resolution(self, image):
-        """Preprocessing optimized for high-resolution images"""
-        try:
-            # 1. High-quality noise reduction
-            denoised = cv2.fastNlMeansDenoising(image)
-            
-            # 2. Advanced contrast enhancement
-            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
-            enhanced = clahe.apply(denoised)
-            
-            # 3. Edge-preserving smoothing
-            bilateral = cv2.bilateralFilter(enhanced, 9, 75, 75)
-            
-            # 4. Sharpening
-            kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
-            sharpened = cv2.filter2D(bilateral, -1, kernel)
-            
-            # 5. Adaptive thresholding for high-res
-            thresh = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-            
-            return thresh
-            
-        except Exception as e:
-            print(f"Error in high-resolution preprocessing: {str(e)}")
-            return image
-    
-    def detect_barcode_orientation(self, barcode_obj):
-        """Detect the orientation of the barcode"""
-        try:
-            if hasattr(barcode_obj, 'polygon') and len(barcode_obj.polygon) >= 4:
-                # Calculate orientation based on polygon points
-                points = np.array(barcode_obj.polygon)
-                # Calculate the angle of the longest edge
-                edges = []
-                for i in range(4):
-                    p1 = points[i]
-                    p2 = points[(i + 1) % 4]
-                    edge_length = np.linalg.norm(p2 - p1)
-                    angle = np.arctan2(p2[1] - p1[1], p2[0] - p1[0]) * 180 / np.pi
-                    edges.append((edge_length, angle))
-                
-                # Find the longest edge (likely the main barcode direction)
-                longest_edge = max(edges, key=lambda x: x[0])
-                return f"{longest_edge[1]:.1f}°"
-            
-            return "Unknown"
-        except:
-            return "Unknown"
-    
-    def parse_databar_expanded(self, data):
-        """Parse DataBar Expanded barcode data"""
-        try:
-            # DataBar Expanded can contain multiple data fields
-            # Format: [01]12345678901234[3101]123[3102]456
-            parsed_data = {}
-            
-            # Extract GS1 Application Identifiers
-            ai_pattern = r'\[(\d{2,4})\]([^\[]+)'
-            matches = re.findall(ai_pattern, data)
-            
-            for ai, value in matches:
-                parsed_data[f"AI {ai}"] = value
-            
-            # If no AI pattern found, return original data
-            if not parsed_data:
-                parsed_data["Raw Data"] = data
-            
-            return parsed_data
-            
-        except Exception as e:
-            return {"Raw Data": data, "Parse Error": str(e)}
-    
-    def is_multi_stack_barcode(self, barcode_obj, image):
-        """Detect if this is a multi-stack barcode"""
-        try:
-            if hasattr(barcode_obj, 'rect'):
-                x, y, w, h = barcode_obj.rect
-                
-                # Check if the barcode is unusually tall (indicating stacked format)
-                aspect_ratio = h / w if w > 0 else 0
-                
-                # DataBar Expanded and other stacked barcodes typically have aspect ratios > 0.3
-                return aspect_ratio > 0.3
-                
-        except:
-            pass
-        
+def find_misspell_boxes(img: Image.Image) -> List[Box]:
+    if not (HAS_OCR and HAS_SPELLCHECK):
+        return []
+    try:
+        spell = SpellChecker()
+        data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
+    except Exception:
+        return []
+    n = len(data.get("text", []))
+    boxes: List[Box] = []
+    for i in range(n):
+        text = data["text"][i]
+        if not text:
+            continue
+        token = normalize_token(text)
+        if len(token) < 2:
+            continue
+        if token in spell:
+            continue
+        left  = data.get("left",  [0])[i]
+        top   = data.get("top",   [0])[i]
+        width = data.get("width", [0])[i]
+        height= data.get("height",[0])[i]
+        if width <= 0 or height <= 0:
+            continue
+        boxes.append(Box(top, left, top+height, left+width, width*height))
+    return boxes
+
+# -------------------- Barcode / QR -----------------
+def ean_like_checksum_ok(digits: str) -> bool:
+    if not digits.isdigit():
         return False
-    
-    def detect_stack_type(self, barcode_obj, image):
-        """Detect the type of multi-stack barcode"""
-        try:
-            if hasattr(barcode_obj, 'rect'):
-                x, y, w, h = barcode_obj.rect
-                aspect_ratio = h / w if w > 0 else 0
-                
-                # Classify based on aspect ratio and barcode type
-                if 'databar' in barcode_obj.type.lower():
-                    if aspect_ratio > 0.5:
-                        return "Quad Stack"
-                    elif aspect_ratio > 0.35:
-                        return "Triple Stack"
-                    elif aspect_ratio > 0.25:
-                        return "Double Stack"
-                    else:
-                        return "Single Stack"
-                else:
-                    # For other barcode types
-                    if aspect_ratio > 0.4:
-                        return "Multi-Stack"
-                    else:
-                        return "Single Stack"
-                        
-        except:
+    n = len(digits)
+    if n not in (8, 12, 13):
+        return True
+    nums = [int(c) for c in digits]
+    if n == 8:
+        body, check = nums[:7], nums[7]
+        s = sum(body[i] * (3 if i % 2 == 0 else 1) for i in range(7))
+        return (10 - (s % 10)) % 10 == check
+    if n == 12:
+        body, check = nums[:11], nums[11]
+        s = sum(body[i] * (3 if i % 2 == 0 else 1) for i in range(11))
+        return (10 - (s % 10)) % 10 == check
+    if n == 13:
+        body, check = nums[:12], nums[12]
+        s = sum(body[i] * (1 if i % 2 == 0 else 3) for i in range(12))
+        return (10 - (s % 10)) % 10 == check
+    return True
+
+def validate_symbology(symbology: str, data: bytes) -> bool:
+    try:
+        text = data.decode('utf-8', errors='ignore')
+    except Exception:
+        return False
+    sym = (symbology or '').upper()
+    if sym in ("EAN13","EAN-13","EAN8","EAN-8","UPCA","UPC-A"):
+        return ean_like_checksum_ok(re.sub(r"\D", "", text))
+    if sym in ("QRCODE","QRCODEMODEL2","QR-CODE"):
+        return len(text) > 0
+    return len(text) > 0
+
+def boxes_from_rect(x: int, y: int, w: int, h: int) -> Box:
+    return Box(y, x, y + h, x + w, w * h)
+
+def decode_with_variants(img: Image.Image):
+    if not HAS_BARCODE:
+        return []
+    results = []
+    def do_decode(pil_img):
+        try:
+            dec = zbar_decode(pil_img)
+            if dec: results.extend(dec)
+        except Exception:
             pass
-        
-        return "Unknown"
-    
-    def remove_duplicate_barcodes(self, barcodes):
-        """Remove duplicate barcodes based on position and data"""
-        unique_barcodes = []
-        seen_positions = set()
-        seen_data = set()
-        
-        for barcode in barcodes:
-            # Create position signature
-            pos_signature = f"{barcode['rect'].left},{barcode['rect'].top},{barcode['rect'].width},{barcode['rect'].height}"
-            data_signature = barcode['data']
-            
-            # Check if we've seen this position or data before
-            if pos_signature not in seen_positions and data_signature not in seen_data:
-                unique_barcodes.append(barcode)
-                seen_positions.add(pos_signature)
-                seen_data.add(data_signature)
-        
-        return unique_barcodes
-    
-    def enhance_barcode_data(self, barcodes):
-        """Enhance barcode data with additional analysis"""
-        enhanced_barcodes = []
-        
-        for barcode in barcodes:
-            # Add confidence score based on method and quality
-            confidence = self.calculate_confidence(barcode)
-            barcode['confidence'] = confidence
-            
-            # Add GS1 validation for DataBar
-            if 'databar' in barcode['type'].lower():
-                barcode['gs1_validated'] = self.validate_gs1_format(barcode['data'])
-            
-            enhanced_barcodes.append(barcode)
-        
-        return enhanced_barcodes
-    
-    def calculate_confidence(self, barcode):
-        """Calculate confidence score for barcode detection"""
-        confidence = 50  # Base confidence
-        
-        # Method confidence
-        method_scores = {
-            'pyzbar_basic': 70,
-            'pyzbar_enhanced': 70,
-            'dynamsoft': 85,  # Dynamsoft typically has higher accuracy
-            'enhanced_preprocessing_0': 65,
-            'enhanced_preprocessing_1': 60,
-            'enhanced_preprocessing_2': 55,
-            'transform_0deg': 60,
-            'transform_90deg': 50,
-            'transform_180deg': 50,
-            'transform_270deg': 50,
-            'small_barcode_detection': 75,
-            'high_res_2x': 70,
-            'high_res_3x': 65,
-            'high_res_4x': 60
-        }
-        
-        if barcode.get('method') in method_scores:
-            confidence += method_scores[barcode['method']]
-        
-        # Quality score
-        if barcode.get('quality', 0) > 0:
-            confidence += min(barcode['quality'], 20)
-        
-        # DataBar specific confidence
-        if 'databar' in barcode['type'].lower():
-            confidence += 10
-        
-        return min(confidence, 100)
-    
-    def validate_gs1_format(self, data):
-        """Validate GS1 format for DataBar data"""
-        try:
-            # Check for GS1 Application Identifiers
-            ai_pattern = r'\[(\d{2,4})\]'
-            matches = re.findall(ai_pattern, data)
-            
-            if matches:
-                return True
-            
-            # Check for parentheses format
-            ai_pattern_parens = r'\((\d{2,4})\)'
-            matches_parens = re.findall(ai_pattern_parens, data)
-            
-            return len(matches_parens) > 0
-            
-        except:
-            return False
-    
-    def check_spelling(self, text):
-        """
-        Robust EN/FR spell check:
-        - Unicode-aware tokens (keeps accents)
-        - Normalizes curly quotes/ligatures
-        - Heuristic per-token language (accented => FR; else EN)
-        - Flags if unknown in its likely language (not both)
+    do_decode(img)
+    if not results:      do_decode(img.convert('L'))
+    if not results:      do_decode(img.resize((img.width*2, img.height*2), Image.BICUBIC))
+    if not results and img.mode != 'RGB':
+        do_decode(img.convert('RGB'))
+    return results
+
+def find_barcode_boxes_and_info(img: Image.Image):
+    decodes = decode_with_variants(img)
+    boxes: List[Box] = []
+    infos = []
+    for d in decodes:
+        rect = d.rect
+        boxes.append(boxes_from_rect(rect.left, rect.top, rect.width, rect.height))
+        valid = validate_symbology(d.type, d.data)
+        infos.append({
+            'type': d.type,
+            'data': (d.data.decode('utf-8', errors='ignore') if isinstance(d.data, (bytes, bytearray)) else str(d.data)),
+            'left': rect.left, 'top': rect.top, 'width': rect.width, 'height': rect.height,
+            'valid': bool(valid)
+        })
+    return boxes, infos
+
+# -------------------- CMYK Panel -------------------
+def rgb_to_cmyk_array(img: Image.Image) -> np.ndarray:
+    return np.asarray(img.convert('CMYK')).astype(np.float32)  # 0..255
+
+def avg_cmyk_in_box(cmyk_arr: np.ndarray, box: Box) -> Tuple[float,float,float,float]:
+    y1,y2 = max(0, box.y1), min(cmyk_arr.shape[0], box.y2)
+    x1,x2 = max(0, box.x1), min(cmyk_arr.shape[1], box.x2)
+    if y2<=y1 or x2<=x1:
+        return (0.0,0.0,0.0,0.0)
+    region = cmyk_arr[y1:y2, x1:x2, :]
+    mean_vals = region.reshape(-1, 4).mean(axis=0)
+    return tuple(float(round(v * 100.0 / 255.0, 1)) for v in mean_vals)
+
+def compute_cmyk_diffs(a_img: Image.Image, b_img: Image.Image, red_boxes: List[Box]):
+    a_cmyk = rgb_to_cmyk_array(a_img)
+    b_cmyk = rgb_to_cmyk_array(b_img)
+    entries = []
+    for i, bx in enumerate(red_boxes):
+        a_vals = avg_cmyk_in_box(a_cmyk, bx)
+        b_vals = avg_cmyk_in_box(b_cmyk, bx)
+        delta  = tuple(round(b_vals[j] - a_vals[j], 1) for j in range(4))
+        entries.append({'idx': i+1, 'A': a_vals, 'B': b_vals, 'Delta': delta})
+    return entries
+
+def draw_cmyk_panel(base: Image.Image, entries, title: str = 'CMYK breakdowns', panel_width: int = 260) -> Image.Image:
+    w,h = base.size
+    panel = Image.new('RGB', (panel_width, h), (245,245,245))
+    out = Image.new('RGB', (w+panel_width, h), (255,255,255))
+    out.paste(base, (0,0)); out.paste(panel, (w,0))
+    d = ImageDraw.Draw(out)
+    x0 = w + 8; y = 8
+    d.text((x0, y), title, fill=(0,0,0)); y += 18
+    if not entries:
+        d.text((x0, y), 'No differing regions', fill=(80,80,80))
+        return out
+    for e in entries:
+        idx = e['idx']; aC,aM,aY,aK = e['A']; bC,bM,bY,bK = e['B']; dC,dM,dY,dK = e['Delta']
+        d.text((x0, y), f"#{idx}", fill=(0,0,0)); y += 14
+        d.text((x0, y), f"A: C {aC}% M {aM}% Y {aY}% K {aK}%", fill=(0,0,0)); y += 14
+        d.text((x0, y), f"B: C {bC}% M {bM}% Y {bY}% K {bK}%", fill=(0,0,0)); y += 14
+        d.text((x0, y), f"Δ: C {dC}% M {dM}% Y {dY}% K {dK}%", fill=(120,0,0)); y += 18
+        if y > h - 40: break
+    return out
+
+# -------------------- Gradio Interface -----------------
+def compare_pdfs(file_a, file_b):
+    """Main comparison function for Gradio interface"""
+    try:
+        if file_a is None or file_b is None:
+            return None, None, None, "❌ Please upload both PDF files to compare", [], []
+
+        # Load images with default settings
+        a = load_first_page(file_a.name, dpi=300)
+        b = load_first_page(file_b.name, dpi=300)
+
+        # Match sizes
+        a, b = match_sizes(a, b)
+
+        # Find differences with default settings
+        diff = difference_map(a, b)
+        red_boxes = find_diff_boxes(diff, threshold=12, min_area=25)
+
+        # Run all analysis features with defaults
+        misspell_a = find_misspell_boxes(a) if HAS_OCR and HAS_SPELLCHECK else []
+        misspell_b = find_misspell_boxes(b) if HAS_OCR and HAS_SPELLCHECK else []
+
+        if HAS_BARCODE:
+            bar_a, info_a = find_barcode_boxes_and_info(a)
+            bar_b, info_b = find_barcode_boxes_and_info(b)
+        else:
+            bar_a, info_a = [], []
+            bar_b, info_b = [], []
+
+        # Always enable CMYK analysis
+        cmyk_entries = compute_cmyk_diffs(a, b, red_boxes)
+        labels = [e['idx'] for e in cmyk_entries]
+
+        # Create visualizations with default box width
+        a_boxed_core = draw_boxes_multi(a, red_boxes, misspell_a, bar_a, width=3, red_labels=labels)
+        b_boxed_core = draw_boxes_multi(b, red_boxes, misspell_b, bar_b, width=3, red_labels=labels)
+
+        # Always show CMYK panel
+        a_disp = draw_cmyk_panel(a_boxed_core, cmyk_entries, title='CMYK Analysis (A vs B)')
+        b_disp = draw_cmyk_panel(b_boxed_core, cmyk_entries, title='CMYK Analysis (A vs B)')
+
+        # Create pixel difference overlay
+        overlay = make_red_overlay(a, b)
+
+        # Create status message
+        status = f"""
+        📊 **Analysis Complete!**
+        - **Difference regions found:** {len(red_boxes)}
+        - **Misspellings detected:** A: {len(misspell_a)}, B: {len(misspell_b)}
+        - **Barcodes found:** A: {len(bar_a)}, B: {len(bar_b)}
+        - **Image dimensions:** {a.width} × {a.height} pixels
+
+        **Legend:**
+        - 🔴 Red boxes: Visual differences
+        - 🔵 Cyan boxes: Spelling errors
+        - 🟢 Green boxes: Barcodes/QR codes
         """
-        try:
-            # normalize ligatures & curly quotes
-            text = unicodedata.normalize("NFKC", text)
-            text = text.replace("'", "'").replace(""", '"').replace(""", '"')
 
-            # unicode letters with internal ' or - allowed
-            tokens = _re.findall(TOKEN_PATTERN, text, flags=_re.UNICODE if _USE_REGEX else 0)
+        # Prepare barcode data for tables
+        codes_a = [[c.get('type',''), c.get('data',''), c.get('left',0), c.get('top',0),
+                   c.get('width',0), c.get('height',0), c.get('valid', False)] for c in info_a]
+        codes_b = [[c.get('type',''), c.get('data',''), c.get('left',0), c.get('top',0),
+                   c.get('width',0), c.get('height',0), c.get('valid', False)] for c in info_b]
 
-            issues = []
-            for raw in tokens:
-                t = raw.lower()
+        return overlay, a_disp, b_disp, status, codes_a, codes_b
 
-                # skip very short, short ALL-CAPS acronyms, and whitelisted terms
-                if len(t) < 3:
-                    continue
-                if raw.isupper() and len(raw) <= 3:
-                    continue
-                if t in DOMAIN_WHITELIST:
-                    continue
+    except Exception as e:
+        error_msg = f"❌ **Error:** {str(e)}"
+        return None, None, None, error_msg, [], []
 
-                miss_en = t in self.english_spellchecker.unknown([t])
-                miss_fr = t in self.french_spellchecker.unknown([t])
+# -------------------- Gradio App -------------------
+def create_demo():
+    with gr.Blocks(title="PDF Comparison Tool", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🔍 Advanced PDF Comparison Tool
 
-                use_fr = _likely_french(raw)
+        Upload two PDF files to get comprehensive analysis including:
+        - **Visual differences** with bounding boxes
+        - **OCR and spell checking**
+        - **Barcode/QR code detection**
+        - **CMYK color analysis**
+        """)
 
-                # Prefer the likely language, but fall back to "either language unknown"
-                if (use_fr and miss_fr) or ((not use_fr) and miss_en) or (miss_en and miss_fr):
-                    issues.append({
-                        "word": raw,
-                        "lang": "fr" if use_fr else "en",
-                        "suggestions_en": list(self.english_spellchecker.candidates(t))[:3],
-                        "suggestions_fr": list(self.french_spellchecker.candidates(t))[:3],
-                    })
+        with gr.Row():
+            with gr.Column():
+                file_a = gr.File(label="📄 PDF A (Reference)", file_types=[".pdf"])
+                file_b = gr.File(label="📄 PDF B (Comparison)", file_types=[".pdf"])
 
-            return issues
-        except Exception as e:
-            print(f"Error checking spelling: {e}")
-            return []
-    
-    def compare_colors(self, image1, image2):
-        """Compare colors between two images and return differences using RGB color space"""
-        try:
-            print("Starting RGB color comparison...")
-            
-            # Convert images to same size
-            img1 = np.array(image1)
-            img2 = np.array(image2)
-            
-            print(f"Image 1 shape: {img1.shape}")
-            print(f"Image 2 shape: {img2.shape}")
-            
-            # Resize images to same dimensions
-            height = min(img1.shape[0], img2.shape[0])
-            width = min(img1.shape[1], img2.shape[1])
-            
-            img1_resized = cv2.resize(img1, (width, height))
-            img2_resized = cv2.resize(img2, (width, height))
-            
-            print(f"Resized to: {width}x{height}")
-            
-            # Keep images in RGB format (no conversion to BGR)
-            img1_rgb = img1_resized
-            img2_rgb = img2_resized
-            
-            color_differences = []
-            
-            # Method 1: Enhanced RGB channel comparison with 20% more accuracy
-            print("Method 1: Enhanced RGB channel comparison")
-            
-            # Calculate absolute difference for each RGB channel with enhanced precision
-            diff_r = cv2.absdiff(img1_rgb[:,:,0], img2_rgb[:,:,0])  # Red channel
-            diff_g = cv2.absdiff(img1_rgb[:,:,1], img2_rgb[:,:,1])  # Green channel
-            diff_b = cv2.absdiff(img1_rgb[:,:,2], img2_rgb[:,:,2])  # Blue channel
-            
-            # Enhanced RGB combination with better weighting
-            diff_combined = cv2.addWeighted(diff_r, 0.4, diff_g, 0.4, 0)  # Red and Green weighted higher
-            diff_combined = cv2.addWeighted(diff_combined, 1.0, diff_b, 0.2, 0)  # Blue weighted lower
-            
-            # Apply Gaussian blur to reduce noise and improve accuracy
-            diff_combined = cv2.GaussianBlur(diff_combined, (3, 3), 0)
-            
-            # Apply balanced thresholds to catch color variations while avoiding multiple boxes
-            rgb_thresholds = [15, 22, 30, 40]  # Balanced thresholds
-            
-            for threshold in rgb_thresholds:
-                _, thresh = cv2.threshold(diff_combined, threshold, 255, cv2.THRESH_BINARY)
-                
-                # Apply minimal morphological operations
-                kernel = np.ones((1, 1), np.uint8)  # Minimal kernel to preserve detail
-                thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
-                thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
-                
-                # Find contours
-                contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-                
-                print(f"RGB Threshold {threshold}: Found {len(contours)} contours")
-                
-                for contour in contours:
-                    area = cv2.contourArea(contour)
-                    if area > 15:  # Balanced area threshold to catch variations while avoiding small boxes
-                        x, y, w, h = cv2.boundingRect(contour)
-                        
-                        # Get the actual RGB colors at this location
-                        color1 = img1_rgb[y:y+h, x:x+w].mean(axis=(0, 1))
-                        color2 = img2_rgb[y:y+h, x:x+w].mean(axis=(0, 1))
-                        
-                        # Calculate RGB color difference magnitude
-                        color_diff = np.linalg.norm(color1 - color2)
-                        
-                        # Flag moderate color differences
-                        if color_diff > 18:  # Balanced threshold
-                            # Check if this area is already covered (refined consolidated problem areas)
-                            already_covered = False
-                            for existing_diff in color_differences:
-                                if (abs(existing_diff['x'] - x) < 21 and 
-                                    abs(existing_diff['y'] - y) < 21 and
-                                    abs(existing_diff['width'] - w) < 21 and
-                                    abs(existing_diff['height'] - h) < 21):
-                                    already_covered = True
-                                    break
-                            
-                            if not already_covered:
-                                color_differences.append({
-                                    'x': x,
-                                    'y': y,
-                                    'width': w,
-                                    'height': h,
-                                    'area': area,
-                                    'color1': color1.tolist(),
-                                    'color2': color2.tolist(),
-                                    'threshold': f"RGB_{threshold}",
-                                    'color_diff': color_diff,
-                                    'diff_r': float(abs(color1[0] - color2[0])),
-                                    'diff_g': float(abs(color1[1] - color2[1])),
-                                    'diff_b': float(abs(color1[2] - color2[2]))
-                                })
-            
-            # Method 2: Enhanced HSV color space comparison with 20% more accuracy
-            print("Method 2: Enhanced HSV color space comparison")
-            
-            # Convert to HSV for better color difference detection
-            img1_hsv = cv2.cvtColor(img1_rgb, cv2.COLOR_RGB2HSV)
-            img2_hsv = cv2.cvtColor(img2_rgb, cv2.COLOR_RGB2HSV)
-            
-            # Enhanced HSV comparison with better channel weighting
-            hue_diff = cv2.absdiff(img1_hsv[:,:,0], img2_hsv[:,:,0])  # Hue channel
-            sat_diff = cv2.absdiff(img1_hsv[:,:,1], img2_hsv[:,:,1])  # Saturation channel
-            val_diff = cv2.absdiff(img1_hsv[:,:,2], img2_hsv[:,:,2])  # Value channel
-            
-            # Enhanced HSV combination with better weighting
-            hsv_combined = cv2.addWeighted(hue_diff, 0.5, sat_diff, 0.3, 0)  # Hue and Saturation
-            hsv_combined = cv2.addWeighted(hsv_combined, 1.0, val_diff, 0.2, 0)  # Add Value channel
-            
-            # Apply Gaussian blur to reduce noise and improve accuracy
-            hsv_combined = cv2.GaussianBlur(hsv_combined, (3, 3), 0)
-            
-            # Apply balanced HSV thresholds to catch color variations while avoiding multiple boxes
-            hsv_thresholds = [18, 25, 35, 45]  # Balanced HSV thresholds
-            
-            for threshold in hsv_thresholds:
-                _, hsv_thresh = cv2.threshold(hsv_combined, threshold, 255, cv2.THRESH_BINARY)
-                
-                # Apply minimal morphological operations
-                kernel = np.ones((1, 1), np.uint8)
-                hsv_thresh = cv2.morphologyEx(hsv_thresh, cv2.MORPH_CLOSE, kernel)
-                hsv_thresh = cv2.morphologyEx(hsv_thresh, cv2.MORPH_OPEN, kernel)
-                
-                # Find contours
-                hsv_contours, _ = cv2.findContours(hsv_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-                
-                print(f"HSV Threshold {threshold}: Found {len(hsv_contours)} contours")
-                
-                for contour in hsv_contours:
-                    area = cv2.contourArea(contour)
-                    if area > 15:  # Balanced area threshold to catch variations while avoiding small boxes
-                        x, y, w, h = cv2.boundingRect(contour)
-                        
-                        # Get the actual colors at this location
-                        color1 = img1_rgb[y:y+h, x:x+w].mean(axis=(0, 1))
-                        color2 = img2_rgb[y:y+h, x:x+w].mean(axis=(0, 1))
-                        
-                        # Calculate color difference magnitude
-                        color_diff = np.linalg.norm(color1 - color2)
-                        
-                        # Flag moderate color differences
-                        if color_diff > 22:  # Balanced threshold
-                            # Check if this area is already covered (refined consolidated problem areas)
-                            already_covered = False
-                            for existing_diff in color_differences:
-                                if (abs(existing_diff['x'] - x) < 21 and 
-                                    abs(existing_diff['y'] - y) < 21 and
-                                    abs(existing_diff['width'] - w) < 21 and
-                                    abs(existing_diff['height'] - h) < 21):
-                                    already_covered = True
-                                    break
-                            
-                            if not already_covered:
-                                color_differences.append({
-                                    'x': x,
-                                    'y': y,
-                                    'width': w,
-                                    'height': h,
-                                    'area': area,
-                                    'color1': color1.tolist(),
-                                    'color2': color2.tolist(),
-                                    'threshold': f"HSV_{threshold}",
-                                    'color_diff': color_diff,
-                                    'diff_r': float(abs(color1[0] - color2[0])),
-                                    'diff_g': float(abs(color1[1] - color2[1])),
-                                    'diff_b': float(abs(color1[2] - color2[2]))
-                                })
-            
-            # Method 3: Enhanced pixel-by-pixel RGB comparison with 20% more accuracy
-            print("Method 3: Enhanced pixel-by-pixel RGB comparison")
-            
-            # Sample every 12th pixel for less sensitivity (20% less frequent)
-            for y in range(0, height, 12):
-                for x in range(0, width, 12):
-                    color1 = img1_rgb[y, x]
-                    color2 = img2_rgb[y, x]
-                    
-                    # Calculate absolute difference for each RGB channel
-                    diff_r = abs(int(color1[0]) - int(color2[0]))  # Red channel
-                    diff_g = abs(int(color1[1]) - int(color2[1]))  # Green channel
-                    diff_b = abs(int(color1[2]) - int(color2[2]))  # Blue channel
-                    
-                                        # Flag if RGB channels differ by moderate amounts
-                    if diff_r > 10 or diff_g > 10 or diff_b > 10:
-                        # Check if this area is already covered (refined consolidated problem areas)
-                        already_covered = False
-                        for existing_diff in color_differences:
-                            if (abs(existing_diff['x'] - x) < 21 and 
-                                abs(existing_diff['y'] - y) < 21):
-                                already_covered = True
-                                break
-                        
-                        if not already_covered:
-                            color_differences.append({
-                                'x': x,
-                                'y': y,
-                                'width': 5,  # Small box around the pixel
-                                'height': 5,
-                                'area': 25,
-                                'color1': color1.tolist(),
-                                'color2': color2.tolist(),
-                                'threshold': 'pixel_RGB',
-                                'color_diff': diff_r + diff_g + diff_b,
-                                'diff_r': diff_r,
-                                'diff_g': diff_g,
-                                'diff_b': diff_b
-                            })
-            
-            print(f"RGB color comparison completed. Found {len(color_differences)} total differences.")
-            
-            # Method 4: LAB color space comparison for perceptual accuracy (20% more accurate)
-            print("Method 4: LAB color space comparison")
-            
-            # Convert to LAB color space for perceptual color differences
-            img1_lab = cv2.cvtColor(img1_rgb, cv2.COLOR_RGB2LAB)
-            img2_lab = cv2.cvtColor(img2_rgb, cv2.COLOR_RGB2LAB)
-            
-            # Calculate LAB differences (perceptually uniform)
-            lab_diff_l = cv2.absdiff(img1_lab[:,:,0], img2_lab[:,:,0])  # L channel (lightness)
-            lab_diff_a = cv2.absdiff(img1_lab[:,:,1], img2_lab[:,:,1])  # a channel (green-red)
-            lab_diff_b = cv2.absdiff(img1_lab[:,:,2], img2_lab[:,:,2])  # b channel (blue-yellow)
-            
-            # Combine LAB differences with perceptual weighting
-            lab_combined = cv2.addWeighted(lab_diff_l, 0.3, lab_diff_a, 0.35, 0)  # L and a channels
-            lab_combined = cv2.addWeighted(lab_combined, 1.0, lab_diff_b, 0.35, 0)  # Add b channel
-            
-            # Apply Gaussian blur for noise reduction
-            lab_combined = cv2.GaussianBlur(lab_combined, (3, 3), 0)
-            
-            # Apply balanced LAB thresholds to catch color variations while avoiding multiple boxes
-            lab_thresholds = [20, 28, 38, 50]  # Balanced LAB thresholds
-            
-            for threshold in lab_thresholds:
-                _, lab_thresh = cv2.threshold(lab_combined, threshold, 255, cv2.THRESH_BINARY)
-                
-                # Apply morphological operations
-                kernel = np.ones((1, 1), np.uint8)
-                lab_thresh = cv2.morphologyEx(lab_thresh, cv2.MORPH_CLOSE, kernel)
-                lab_thresh = cv2.morphologyEx(lab_thresh, cv2.MORPH_OPEN, kernel)
-                
-                # Find contours
-                lab_contours, _ = cv2.findContours(lab_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-                
-                print(f"LAB Threshold {threshold}: Found {len(lab_contours)} contours")
-                
-                for contour in lab_contours:
-                    area = cv2.contourArea(contour)
-                    if area > 15:  # Balanced area threshold to catch variations while avoiding small boxes
-                        x, y, w, h = cv2.boundingRect(contour)
-                        
-                        # Get the actual colors at this location
-                        color1 = img1_rgb[y:y+h, x:x+w].mean(axis=(0, 1))
-                        color2 = img2_rgb[y:y+h, x:x+w].mean(axis=(0, 1))
-                        
-                        # Calculate color difference magnitude
-                        color_diff = np.linalg.norm(color1 - color2)
-                        
-                        # Flag moderate color differences
-                        if color_diff > 22:  # Balanced threshold
-                            # Check if this area is already covered (refined consolidated problem areas)
-                            already_covered = False
-                            for existing_diff in color_differences:
-                                if (abs(existing_diff['x'] - x) < 21 and 
-                                    abs(existing_diff['y'] - y) < 21 and
-                                    abs(existing_diff['width'] - w) < 21 and
-                                    abs(existing_diff['height'] - h) < 21):
-                                    already_covered = True
-                                    break
-                            
-                            if not already_covered:
-                                color_differences.append({
-                                    'x': x,
-                                    'y': y,
-                                    'width': w,
-                                    'height': h,
-                                    'area': area,
-                                    'color1': color1.tolist(),
-                                    'color2': color2.tolist(),
-                                    'threshold': f"LAB_{threshold}",
-                                    'color_diff': color_diff,
-                                    'diff_r': float(abs(color1[0] - color2[0])),
-                                    'diff_g': float(abs(color1[1] - color2[1])),
-                                    'diff_b': float(abs(color1[2] - color2[2]))
-                                })
-            
-            print(f"Enhanced color comparison completed. Found {len(color_differences)} total differences.")
-            
-            # Group nearby differences into one perimeter box per issue area
-            if color_differences:
-                grouped_differences = self.group_nearby_differences(color_differences)
-                print(f"Grouped into {len(grouped_differences)} perimeter boxes")
-                return grouped_differences
-            
-            return color_differences
-            
-        except Exception as e:
-            print(f"Error comparing colors: {str(e)}")
-            return []
-    
-    def group_nearby_differences(self, differences):
-        """Group nearby differences into larger bounding boxes around affected areas"""
-        if not differences:
-            return []
-        
-        # Sort differences by position for easier grouping
-        sorted_diffs = sorted(differences, key=lambda x: (x['y'], x['x']))
-        
-        grouped_areas = []
-        current_group = []
-        
-        for diff in sorted_diffs:
-            if not current_group:
-                current_group = [diff]
-            else:
-                # Check if this difference is close to the current group
-                should_group = False
-                for group_diff in current_group:
-                    # Calculate distance between centers
-                    center1_x = group_diff['x'] + group_diff['width'] // 2
-                    center1_y = group_diff['y'] + group_diff['height'] // 2
-                    center2_x = diff['x'] + diff['width'] // 2
-                    center2_y = diff['y'] + diff['height'] // 2
-                    
-                    distance = ((center1_x - center2_x) ** 2 + (center1_y - center2_y) ** 2) ** 0.5
-                    
-                    # If distance is less than 200 pixels, group them for one box per main issue
-                    if distance < 200:
-                        should_group = True
-                        break
-                
-                if should_group:
-                    current_group.append(diff)
-                else:
-                    # Create bounding box for current group
-                    if current_group:
-                        bounding_box = self.create_group_bounding_box(current_group)
-                        if bounding_box:  # Only add if not None
-                            grouped_areas.append(bounding_box)
-                    current_group = [diff]
-        
-        # Don't forget the last group
-        if current_group:
-            bounding_box = self.create_group_bounding_box(current_group)
-            if bounding_box:  # Only add if not None
-                grouped_areas.append(bounding_box)
-        
-        return grouped_areas
-    
-    def group_nearby_differences(self, differences):
-        """Group nearby differences into one perimeter box per issue area"""
-        if not differences:
-            return []
-        
-        # Sort differences by position for easier grouping
-        sorted_diffs = sorted(differences, key=lambda x: (x['y'], x['x']))
-        
-        grouped_areas = []
-        current_group = []
-        
-        for diff in sorted_diffs:
-            if not current_group:
-                current_group = [diff]
-            else:
-                # Check if this difference is close to the current group
-                should_group = False
-                for group_diff in current_group:
-                    # Calculate distance between centers
-                    center1_x = group_diff['x'] + group_diff['width'] // 2
-                    center1_y = group_diff['y'] + group_diff['height'] // 2
-                    center2_x = diff['x'] + diff['width'] // 2
-                    center2_y = diff['y'] + diff['height'] // 2
-                    
-                    distance = ((center1_x - center2_x) ** 2 + (center1_y - center2_y) ** 2) ** 0.5
-                    
-                    # If distance is less than 234 pixels, group them for refined consolidated problem areas
-                    if distance < 234:
-                        should_group = True
-                        break
-                
-                if should_group:
-                    current_group.append(diff)
-                else:
-                    # Create perimeter box for current group
-                    if current_group:
-                        perimeter_box = self.create_perimeter_box(current_group)
-                        if perimeter_box:  # Only add if not None
-                            grouped_areas.append(perimeter_box)
-                    current_group = [diff]
-        
-        # Don't forget the last group
-        if current_group:
-            perimeter_box = self.create_perimeter_box(current_group)
-            if perimeter_box:  # Only add if not None
-                grouped_areas.append(perimeter_box)
-        
-        return grouped_areas
-    
-    def create_perimeter_box(self, group):
-        """Create a perimeter box that encompasses all differences in a group"""
-        if not group:
-            return None
-        
-        # Find the overall bounding box
-        min_x = min(diff['x'] - 5 for diff in group)  # Include 5-pixel extension
-        min_y = min(diff['y'] - 5 for diff in group)  # Include 5-pixel extension
-        max_x = max(diff['x'] + diff['width'] + 5 for diff in group)  # Include 5-pixel extension
-        max_y = max(diff['y'] + diff['height'] + 5 for diff in group)  # Include 5-pixel extension
-        
-        # Add minimal padding around the perimeter box (refined consolidated problem areas)
-        padding = 7
-        min_x = max(0, min_x - padding)
-        min_y = max(0, min_y - padding)
-        max_x = max_x + padding
-        max_y = max_y + padding
-        
-        # Calculate final dimensions
-        width = max_x - min_x
-        height = max_y - min_y
-        
-        # Filter out very small groups (refined consolidated problem areas)
-        if width < 26 or height < 26:
-            return None
-            
-        return {
-            'x': min_x,
-            'y': min_y,
-            'width': width,
-            'height': height,
-            'area': width * height,
-            'color1': [0, 0, 0],  # Placeholder
-            'color2': [0, 0, 0],  # Placeholder
-            'threshold': 'perimeter',
-            'color_diff': 1.0,
-            'num_original_differences': len(group)
-        }
-    
-    def create_annotated_image(self, image, differences, output_path):
-        """Create annotated image with red boxes around differences"""
-        try:
-            print(f"Creating annotated image: {output_path}")
-            print(f"Number of differences to annotate: {len(differences)}")
-            
-            # Create a copy of the image
-            annotated_image = image.copy()
-            draw = ImageDraw.Draw(annotated_image)
-            
-            # Draw red rectangles around differences
-            for i, diff in enumerate(differences):
-                x, y, w, h = diff['x'], diff['y'], diff['width'], diff['height']
-                
-                # Draw thicker red rectangle
-                draw.rectangle([x, y, x + w, y + h], outline='red', width=5)
-                
-                print(f"Drawing rectangle {i+1}: ({x}, {y}) to ({x+w}, {y+h})")
-            
-            # Save annotated image
-            annotated_image.save(output_path)
-            print(f"Annotated image saved successfully: {output_path}")
-            
-        except Exception as e:
-            print(f"Error creating annotated image: {str(e)}")
-            # Try to save the original image as fallback
-            try:
-                image.save(output_path)
-                print(f"Saved original image as fallback: {output_path}")
-            except Exception as e2:
-                print(f"Failed to save fallback image: {str(e2)}")
-    
-    def compare_pdfs(self, pdf1_path, pdf2_path, session_id):
-        """Main comparison function with improved error handling"""
-        try:
-            print("Starting PDF comparison...")
-            start_time = time.time()
-            
-            # Validate both PDFs contain "50 Carroll"
-            print("Validating PDF 1...")
-            if not self.validate_pdf(pdf1_path):
-                raise Exception("INVALID DOCUMENT")
-            
-            print("Validating PDF 2...")
-            if not self.validate_pdf(pdf2_path):
-                raise Exception("INVALID DOCUMENT")
-            
-            # Extract text and images from both PDFs
-            print("Extracting text from PDF 1...")
-            pdf1_data = self.extract_text_from_pdf(pdf1_path)
-            if not pdf1_data:
-                raise Exception("INVALID DOCUMENT")
-            
-            print("Extracting text from PDF 2...")
-            pdf2_data = self.extract_text_from_pdf(pdf2_path)
-            if not pdf2_data:
-                raise Exception("INVALID DOCUMENT")
-            
-            # Initialize results
-            results = {
-                'session_id': session_id,
-                'validation': {
-                    'pdf1_valid': True,
-                    'pdf2_valid': True,
-                    'validation_text': '50 Carroll'
-                },
-                'text_comparison': [],
-                'spelling_issues': [],
-                'barcodes_qr_codes': [],
-                'color_differences': [],
-                'annotated_images': []
-            }
-            
-            # Compare text and check spelling
-            print("Processing pages...")
-            for i, (page1, page2) in enumerate(zip(pdf1_data, pdf2_data)):
-                print(f"Processing page {i + 1}...")
-                page_results = {
-                    'page': i + 1,
-                    'text_differences': [],
-                    'spelling_issues_pdf1': [],
-                    'spelling_issues_pdf2': [],
-                    'barcodes_pdf1': [],
-                    'barcodes_pdf2': [],
-                    'color_differences': []
-                }
-                
-                # Check spelling for both PDFs
-                print(f"Checking spelling for page {i + 1}...")
-                page_results['spelling_issues_pdf1'] = self.check_spelling(page1['text'])
-                page_results['spelling_issues_pdf2'] = self.check_spelling(page2['text'])
-                
-                # Add spelling issues to text differences for UI visibility
-                if page_results['spelling_issues_pdf1'] or page_results['spelling_issues_pdf2']:
-                    page_results['text_differences'].append({
-                        "type": "spelling",
-                        "pdf1": [i["word"] for i in page_results['spelling_issues_pdf1']],
-                        "pdf2": [i["word"] for i in page_results['spelling_issues_pdf2']],
-                    })
-                
-                # Create spelling-only annotated images (one box per error)
-                spell_dir = f'static/results/{session_id}'
-                os.makedirs(spell_dir, exist_ok=True)
+        compare_btn = gr.Button("🔍 Compare PDF Files", variant="primary", size="lg")
+
+        status_md = gr.Markdown("")
+
+        with gr.Row():
+            overlay_img = gr.Image(label="🔴 Pixel Differences (Red = Different)", type="pil")
+
+        with gr.Row():
+            img_a = gr.Image(label="📄 File A with Analysis", type="pil")
+            img_b = gr.Image(label="📄 File B with Analysis", type="pil")
+
+        gr.Markdown("### 📊 Barcode Detection Results")
+        with gr.Row():
+            codes_a_df = gr.Dataframe(
+                headers=["Type", "Data", "Left", "Top", "Width", "Height", "Valid"],
+                label="Barcodes in File A",
+                interactive=False
+            )
+            codes_b_df = gr.Dataframe(
+                headers=["Type", "Data", "Left", "Top", "Width", "Height", "Valid"],
+                label="Barcodes in File B",
+                interactive=False
+            )
+
+        # Event handlers
+        compare_btn.click(
+            fn=compare_pdfs,
+            inputs=[file_a, file_b],
+            outputs=[overlay_img, img_a, img_b, status_md, codes_a_df, codes_b_df]
+        )
+
+        gr.Markdown("""
+        ### 📝 Instructions:
+        1. Upload two PDF files
+        2. Click "Compare PDF Files"
+        3. View results with comprehensive analysis
 
-                spell_img1 = page1['image'].copy()
-                spell_img2 = page2['image'].copy()
-                spell_img1 = self.annotate_spelling_errors_on_image(spell_img1, page_results['spelling_issues_pdf1'])
-                spell_img2 = self.annotate_spelling_errors_on_image(spell_img2, page_results['spelling_issues_pdf2'])
+        ### 🎨 Color Legend:
+        - **🔴 Red boxes:** Visual differences between files
+        - **🔵 Cyan boxes:** Potential spelling errors (OCR)
+        - **🟢 Green boxes:** Detected barcodes/QR codes
+        - **📊 Side panel:** CMYK color analysis for print workflows
+        """)
 
-                spell_path1 = f'{spell_dir}/page_{i+1}_pdf1_spelling.png'
-                spell_path2 = f'{spell_dir}/page_{i+1}_pdf2_spelling.png'
-                spell_img1.save(spell_path1)
-                spell_img2.save(spell_path2)
+    return demo
 
-                # link them into the results for your UI
-                page_results.setdefault('annotated_images', {})
-                page_results['annotated_images'].update({
-                    'pdf1_spelling': f'results/{session_id}/page_{i+1}_pdf1_spelling.png',
-                    'pdf2_spelling': f'results/{session_id}/page_{i+1}_pdf2_spelling.png',
-                })
-                
-                # Detect barcodes and QR codes
-                print(f"Detecting barcodes for page {i + 1} PDF 1...")
-                page_results['barcodes_pdf1'] = self.detect_barcodes_qr_codes(page1['image']) or []
-                
-                print(f"Detecting barcodes for page {i + 1} PDF 2...")
-                page_results['barcodes_pdf2'] = self.detect_barcodes_qr_codes(page2['image']) or []
-                
-                # Compare colors
-                print(f"Comparing colors for page {i + 1}...")
-                color_diffs = self.compare_colors(page1['image'], page2['image'])
-                page_results['color_differences'] = color_diffs
-                
-                # Create annotated images and save original images
-                print(f"Creating images for page {i + 1}...")
-                output_dir = f'static/results/{session_id}'
-                os.makedirs(output_dir, exist_ok=True)
-                
-                # Save original images
-                original_path1 = f'{output_dir}/page_{i+1}_pdf1_original.png'
-                original_path2 = f'{output_dir}/page_{i+1}_pdf2_original.png'
-                
-                page1['image'].save(original_path1)
-                page2['image'].save(original_path2)
-                
-                # Create annotated images if there are color differences
-                if color_diffs:
-                    print(f"Creating annotated images for page {i + 1}...")
-                    annotated_path1 = f'{output_dir}/page_{i+1}_pdf1_annotated.png'
-                    annotated_path2 = f'{output_dir}/page_{i+1}_pdf2_annotated.png'
-                    
-                    self.create_annotated_image(page1['image'], color_diffs, annotated_path1)
-                    self.create_annotated_image(page2['image'], color_diffs, annotated_path2)
-                    
-                    page_results['annotated_images'] = {
-                        'pdf1': f'results/{session_id}/page_{i+1}_pdf1_annotated.png',
-                        'pdf2': f'results/{session_id}/page_{i+1}_pdf2_annotated.png'
-                    }
-                else:
-                    # If no color differences, use original images
-                    page_results['annotated_images'] = {
-                        'pdf1': f'results/{session_id}/page_{i+1}_pdf1_original.png',
-                        'pdf2': f'results/{session_id}/page_{i+1}_pdf2_original.png'
-                    }
-                
-                results['text_comparison'].append(page_results)
-            
-            # Aggregate spelling issues
-            print("Aggregating results...")
-            all_spelling_issues = []
-            for page in results['text_comparison']:
-                all_spelling_issues.extend(page['spelling_issues_pdf1'])
-                all_spelling_issues.extend(page['spelling_issues_pdf2'])
-            
-            results['spelling_issues'] = all_spelling_issues
-            
-            # Aggregate barcodes and QR codes
-            all_barcodes = []
-            for page in results['text_comparison']:
-                all_barcodes.extend(page['barcodes_pdf1'])
-                all_barcodes.extend(page['barcodes_pdf2'])
-            
-            results['barcodes_qr_codes'] = all_barcodes
-            
-            elapsed_time = time.time() - start_time
-            print(f"PDF comparison completed in {elapsed_time:.2f} seconds.")
-            
-            return results
-            
-        except Exception as e:
-            print(f"Error in PDF comparison: {str(e)}")
-            raise Exception(f"INVALID DOCUMENT")
-# Enhanced OCR for tiny fonts - deployment check
-# Force rebuild - Thu Sep  4 09:33:44 EDT 2025
+if __name__ == "__main__":
+    demo = create_demo()
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        share=True,  # Set to True to create a public link
+        show_error=True
+    )