diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,11 +1,16 @@
-"""Smart Parchi OCR Enterprise — Hugging Face Space backend (CPU-first monolith) v6.0.
-Upgrade notes (v6.0):
-  • Primary engine: Qwen2-VL-2B-Instruct via transformers (fp32 CPU, ~8 GB RAM).
-  • bitsandbytes REMOVED — GPU-only lib; replaced with plain torch CPU build.
-  • Concurrency guard: asyncio.Semaphore(2) prevents OOM on simultaneous requests.
-  • Image-hash LRU cache: up to 100 results, 1-hour TTL.
-  • Explicit gc.collect() after every request to prevent memory bloat.
-  • EasyOCR + PaddleOCR kept as fallback ensemble (unchanged).
+"""
+Parchi OCR – Minimal CPU-Optimised FastAPI Backend
+====================================================
+• EasyOCR  ['ur', 'en']  – single reader, warm on startup
+• Pre-processing : CLAHE → denoise → sharpen → adaptive-threshold
+• Multi-variant  : original + inverted + high-contrast → merge
+• Geometry line grouping  (Y-centre clustering, no column assumption)
+• Number post-processor   (O→0, l→1, spaced digits merged)
+• Generic item parser     (any text+number pattern)
+• Bottom-ROI total pass   (crop 25 %, digit whitelist re-OCR)
+• Lexicon correction      (30 common Pakistani grocery/shop items)
+• SHA-256 LRU cache       (24 h TTL, 500 entries max)
+• ZDR compliance          (no persistent storage, metadata logs only)
 """
 
 from __future__ import annotations
@@ -14,3982 +19,770 @@ import asyncio
 import gc
 import hashlib
 import io
-import itertools
-import json
 import logging
 import math
-import os
-import pickle
-import random
 import re
-import sys
 import threading
 import time
-import warnings
-from collections import defaultdict
-from contextlib import asynccontextmanager, redirect_stdout, redirect_stderr
-from dataclasses import dataclass, field
-from datetime import datetime
-from enum import Enum
-from functools import lru_cache, wraps
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
-from typing_extensions import TypedDict
-
-# ============================================================================
-# ENVIRONMENT OPTIMIZATION (CRITICAL - SET BEFORE IMPORTS)
-# ============================================================================
-# Suppress warnings for cleaner logs
-warnings.filterwarnings("ignore")
-
-# HF CPU stability defaults (can be overridden by Space env variables).
-os.environ.setdefault("OMP_NUM_THREADS", "1")
-
-# Suppress progress bars from external libraries (tqdm, huggingface_hub, etc.)
-os.environ["DISABLE_TQDM"] = "1"
-os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
-os.environ["PADDLE_DOWNLOAD_CACHE"] = "/.cache/paddlepaddle"
-os.environ["PADDLE_HOME"] = "/.cache/paddlepaddle"
-
-# PaddleOCR optimizations for CPU
-os.environ["PADDLE_INFERENCE_MODEL_CACHE"] = "/.cache/paddlepaddle/models"
-os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "0"
-
-# Suppress library-level logging initially
-logging.getLogger("urllib3").setLevel(logging.ERROR)
-logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
-logging.getLogger("paddleocr").setLevel(logging.CRITICAL)
-logging.getLogger("paddlepaddle").setLevel(logging.CRITICAL)
-
-# ============================================================================
-# CORE DEPENDENCIES
-# ============================================================================
+from collections import OrderedDict
+from datetime import datetime, timezone
+from typing import Any
+
 import cv2
+import easyocr
 import numpy as np
-from PIL import Image, ImageEnhance, ImageFilter
-from fastapi import FastAPI, File, HTTPException, UploadFile, BackgroundTasks
+import uvicorn
+from fastapi import FastAPI, File, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-from rapidfuzz import process as fuzzy_process
-from pydantic import BaseModel, Field, validator
-from sklearn.cluster import DBSCAN
-from sklearn.metrics.pairwise import cosine_similarity
-
-# OCR Engines (EasyOCR optional — HF builds may omit it; Paddle still works)
-try:
-    # Suppress EasyOCR startup spam
-    with redirect_stdout(open(os.devnull, 'w')), redirect_stderr(open(os.devnull, 'w')):
-        import easyocr
-    EASYOCR_AVAILABLE = True
-except Exception as e:
-    easyocr = None  # type: ignore
-    EASYOCR_AVAILABLE = False
-
-try:
-    # Suppress PaddleOCR startup spam
-    with redirect_stdout(open(os.devnull, 'w')), redirect_stderr(open(os.devnull, 'w')):
-        from paddleocr import PaddleOCR
-    PADDLE_AVAILABLE = True
-except Exception as e:
-    PADDLE_AVAILABLE = False
-
-try:
-    import pytesseract
-    TESSERACT_AVAILABLE = True
-except Exception:
-    TESSERACT_AVAILABLE = False
-
-# Optional: Lightweight ML for handwriting (scikit-learn only, no heavy dependencies)
-try:
-    from sklearn.ensemble import RandomForestClassifier
-    from sklearn.preprocessing import StandardScaler
-    SKLEARN_AVAILABLE = True
-except Exception:
-    SKLEARN_AVAILABLE = False
-
-# ── Torch (CPU-only build) ──────────────────────���─────────────────────────────
-try:
-    import torch
-    TORCH_AVAILABLE = True
-except Exception:
-    torch = None  # type: ignore
-    TORCH_AVAILABLE = False
-
-# ── Transformers (for Qwen2-VL VLM) ──────────────────────────────────────────
-try:
-    from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
-    TRANSFORMERS_AVAILABLE = True
-except Exception:
-    AutoProcessor = None  # type: ignore
-    Qwen2VLForConditionalGeneration = None  # type: ignore
-    TRANSFORMERS_AVAILABLE = False
-
-# ── Qwen-VL utils (image/video message builder) ───────────────────────────────
-try:
-    from qwen_vl_utils import process_vision_info
-    QWEN_VL_UTILS_AVAILABLE = True
-except Exception:
-    process_vision_info = None  # type: ignore
-    QWEN_VL_UTILS_AVAILABLE = False
-
-# ── psutil for memory monitoring ──────────────────────────────────────────────
-try:
-    import psutil as _psutil
-    PSUTIL_AVAILABLE = True
-except Exception:
-    _psutil = None  # type: ignore
-    PSUTIL_AVAILABLE = False
-
-
-def _get_rss_mb() -> float:
-    """Return current process RSS in MB (0 if psutil unavailable)."""
-    if not PSUTIL_AVAILABLE or _psutil is None:
-        return 0.0
-    try:
-        return _psutil.Process().memory_info().rss / 1024 / 1024
-    except Exception:
-        return 0.0
-
+from PIL import Image
+from rapidfuzz import process as rfprocess
 
-def _free_memory() -> None:
-    """Aggressively release Python + PyTorch memory after each request."""
-    gc.collect()
-    # torch.cuda.empty_cache() is a no-op on CPU builds but we call it
-    # defensively in case someone switches to a GPU Space later.
-    if TORCH_AVAILABLE and torch is not None:
-        try:
-            torch.cuda.empty_cache()
-        except Exception:
-            pass
-
-# ============================================================================
-# LOGGING CONFIGURATION
-# ============================================================================
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler()
-    ]
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    datefmt="%H:%M:%S",
 )
-logger = logging.getLogger(__name__)
-
-# OCR warm-up lifecycle (exposed in /health for HF debugging)
-OCR_WARMUP_STATUS: str = "pending"
-
-# ============================================================================
-# CONFIGURATION MANAGEMENT
-# ============================================================================
-
-class SystemConfig:
-    """Centralized configuration for the entire system"""
-    
-    # Image Processing
-    TARGET_WIDTH = 1200
-    TARGET_HEIGHT = 1600
-    MIN_QUALITY_THRESHOLD = 0.3
-    MAX_IMAGE_SIZE_MB = 10
-    
-    # Preprocessing Parameters
-    CLAHE_CLIP_LIMIT = 2.5
-    CLAHE_TILE_SIZE = (8, 8)
-    SHARPEN_KERNEL = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
-    BLUR_KERNEL = (3, 3)
-    DENOISE_H = 10
-    
-    # OCR Parameters
-    EASYOCR_CONF_THRESH = 0.15        # v5.1: Lowered for robustness
-    EASYOCR_TEXT_THRESH = 0.25        # v5.1: Lowered for robustness
-    EASYOCR_LOW_TEXT = 0.35
-    PADDLE_CONF_THRESH = 0.5
-    TESSERACT_CONF_THRESH = 30
-    
-    # Ensemble Weights
-    EASYOCR_WEIGHT = 0.30
-    PADDLE_WEIGHT = 0.55  # 16GB tier: keep Paddle as primary engine
-    TESSERACT_WEIGHT = 0.25
-    
-    # v5.2: ADAPTIVE QUALITY-BASED THRESHOLDS
-    QUALITY_SHARPNESS_EXCELLENT = 0.80  # Excellent: sharpness > 0.80
-    QUALITY_SHARPNESS_POOR = 0.45      # Poor: sharpness < 0.45
-    AGGRESSIVE_PARSING_THRESHOLD = 0.45 # Enable aggressive parsing if sharpness < this
-    
-    # v5.2: Adaptive threshold multipliers (applied based on quality)
-    ADAPT_THRESH_AGGRESSIVE = 0.10     # Very low quality images
-    ADAPT_THRESH_NORMAL = 0.15         # Normal quality
-    ADAPT_THRESH_STRICT = 0.25         # High quality
-    
-    # v5.2: Confidence boosting
-    CONF_BOOST_LEXICON = 0.12          # Boost if item in lexicon
-    CONF_BOOST_PATTERN = 0.08          # Boost if matches known pattern
-    CONF_BOOST_MATH = 0.15             # Boost if math validates
-    CONF_PENALTY_FALLBACK = -0.15      # Penalty for fallback items
-    
-    # Price/Quantity Validation
-    MAX_REASONABLE_PRICE = 50000.0
-    MIN_ITEM_PRICE = 5.0
-    MAX_QUANTITY = 1000
-    TOTAL_VALIDATION_TOLERANCE = 2.0
-    
-    # Performance
-    MAX_WORKERS = 2
-    CACHE_TTL = 3600        # 1 hour (per spec §9)
-    MAX_CACHE_SIZE = 100    # max 100 cache entries (per spec §7)
-    MAX_PROCESSING_TIME = 60  # seconds
-    NAME_ROI_RATIO = 0.22
-    FASTAPI_HARD_TIMEOUT_SECONDS = float(os.getenv("FASTAPI_HARD_TIMEOUT_SECONDS", "120"))
-
-    # VLM Configuration (v6.0)
-    # Model is loaded lazily on first request to avoid blocking startup.
-    # Set ENABLE_VLM=0 to force EasyOCR-only mode (saves ~8 GB RAM).
-    ENABLE_VLM = os.getenv("ENABLE_VLM", "1").strip().lower() not in ("0", "false", "no", "off")
-    VLM_MODEL_ID = os.getenv("VLM_MODEL_ID", "Qwen/Qwen2-VL-2B-Instruct")
-    VLM_MAX_NEW_TOKENS = int(os.getenv("VLM_MAX_NEW_TOKENS", "512"))
-    VLM_TIMEOUT_SECONDS = float(os.getenv("VLM_TIMEOUT_SECONDS", "60"))
-    # Peak memory guard: if RSS exceeds this MB after VLM load, disable VLM
-    VLM_MEMORY_LIMIT_MB = float(os.getenv("VLM_MEMORY_LIMIT_MB", "12000"))  # 12 GB
-
-    # Paddle CPU tuning (HF CPU-only)
-    PADDLE_CPU_THREADS = int(os.getenv("PADDLE_CPU_THREADS", "4"))
-    PADDLE_USE_MKLDNN = os.getenv("PADDLE_USE_MKLDNN", "1").strip().lower() not in ("0", "false", "no", "off")
-    # Optional: point to pre-downloaded "server" inference models (directories)
-    PADDLE_DET_MODEL_DIR = os.getenv("PADDLE_DET_MODEL_DIR", "").strip()
-    PADDLE_REC_MODEL_DIR = os.getenv("PADDLE_REC_MODEL_DIR", "").strip()
-    PADDLE_CLS_MODEL_DIR = os.getenv("PADDLE_CLS_MODEL_DIR", "").strip()
-    PADDLE_OCR_VERSION = os.getenv("PADDLE_OCR_VERSION", "PP-OCRv4").strip()
-    
-    # Feature Flags (HF CPU: Paddle + EasyOCR ensemble — set ENABLE_PADDLE=0 to force EasyOCR-only)
-    ENABLE_PADDLE = os.getenv("ENABLE_PADDLE", "1").strip().lower() not in ("0", "false", "no", "off")
-    ENABLE_TESSERACT = os.getenv("ENABLE_TESSERACT", "0").strip().lower() in ("1", "true", "yes", "on")
-    ENABLE_SEMANTIC_CORRECTION = True
-    ENABLE_INTELLIGENT_TOTAL = True
-    ENABLE_AGENTIC_LOOP = True
-    
-    # Paths (HF Spaces compatible - using /app/data created in Docker)
-    DATA_ROOT = os.getenv("FEEDBACK_DATA_PATH", "/app/data")
-    MODEL_CACHE = os.getenv("EASYOCR_CACHE", "/.cache")
-    
-    @classmethod
-    def validate(cls):
-        """Validate configuration on startup"""
-        os.makedirs(cls.DATA_ROOT, exist_ok=True)
-        os.makedirs(cls.MODEL_CACHE, exist_ok=True)
-        # Torch / HF caches — keeps EasyOCR+Paddle weights off ephemeral-only paths when possible
-        os.environ.setdefault("TORCH_HOME", cls.MODEL_CACHE)
-        os.environ.setdefault("PADDLEOCR_HOME", os.path.join(cls.MODEL_CACHE, "paddleocr"))
-        # Thread caps (keep deterministic on small CPU)
-        os.environ.setdefault("OMP_NUM_THREADS", str(max(1, cls.PADDLE_CPU_THREADS)))
-        os.environ.setdefault("OPENBLAS_NUM_THREADS", str(max(1, cls.PADDLE_CPU_THREADS)))
-        os.environ.setdefault("MKL_NUM_THREADS", str(max(1, cls.PADDLE_CPU_THREADS)))
-        logger.info(f"✓ Data directory: {cls.DATA_ROOT}")
-        logger.info(f"✓ Model cache: {cls.MODEL_CACHE}")
-    
-    @classmethod
-    def get_adaptive_thresholds(cls, sharpness_score: float) -> Tuple[float, float]:
-        """v5.2: Calculate adaptive OCR thresholds based on image quality (sharpness)"""
-        if sharpness_score < cls.QUALITY_SHARPNESS_POOR:  # Very poor (< 0.45)
-            return (cls.ADAPT_THRESH_AGGRESSIVE, cls.ADAPT_THRESH_AGGRESSIVE * 1.2)
-        elif sharpness_score < 0.60:  # Poor (0.45-0.60)
-            return (cls.EASYOCR_CONF_THRESH * 0.85, cls.EASYOCR_TEXT_THRESH * 0.85)
-        elif sharpness_score > cls.QUALITY_SHARPNESS_EXCELLENT:  # Excellent (> 0.80)
-            return (cls.ADAPT_THRESH_STRICT, cls.ADAPT_THRESH_STRICT * 1.1)
-        else:  # Normal (0.60-0.80)
-            return (cls.ADAPT_THRESH_NORMAL, cls.EASYOCR_TEXT_THRESH)
-
-
-# ============================================================================
-# DATA MODELS (Pydantic)
-# ============================================================================
-
-class ProcessingStatus(str, Enum):
-    PENDING = "pending"
-    PROCESSING = "processing"
-    COMPLETED = "completed"
-    PARTIAL = "partial"
-    FAILED = "failed"
-
-
-class EntityType(str, Enum):
-    CUSTOMER_NAME = "customer_name"
-    ITEM_NAME = "item_name"
-    QUANTITY = "quantity"
-    PRICE = "price"
-    TOTAL = "total"
-    UNIT = "unit"
-
-
-class ConfidenceLevel(str, Enum):
-    HIGH = "high"      # >0.85
-    MEDIUM = "medium"  # 0.65-0.85
-    LOW = "low"        # 0.45-0.65
-    VERY_LOW = "very_low"  # <0.45
-
-
-class BoundingBox(BaseModel):
-    """Bounding box coordinates for visual grounding"""
-    x1: float = Field(ge=0, le=1)
-    y1: float = Field(ge=0, le=1)
-    x2: float = Field(ge=0, le=1)
-    y2: float = Field(ge=0, le=1)
-    
-    @classmethod
-    def from_cv_bbox(cls, bbox: List, img_shape: Tuple[int, int]) -> 'BoundingBox':
-        h, w = img_shape[:2]
-        if isinstance(bbox, list) and len(bbox) >= 4:
-            x_coords = [p[0] for p in bbox]
-            y_coords = [p[1] for p in bbox]
-            return cls(
-                x1=max(0, min(x_coords)) / w,
-                y1=max(0, min(y_coords)) / h,
-                x2=min(w, max(x_coords)) / w,
-                y2=min(h, max(y_coords)) / h
-            )
-        return cls(x1=0, y1=0, x2=1, y2=1)
-
-
-class ExtractedItem(BaseModel):
-    """Structured item extraction result"""
-    name: str = Field(description="Item name (cleaned and normalized)")
-    quantity: float = Field(default=1.0, gt=0, description="Quantity")
-    price: float = Field(gt=0, description="Price in PKR")
-    unit: str = Field(default="pc", description="Unit (kg, g, liter, dozen, pc)")
-    confidence: float = Field(ge=0, le=1, description="Extraction confidence")
-    original_text: str = Field(default="", description="Raw OCR text")
-    bounding_box: Optional[BoundingBox] = None
-    semantic_match: Optional[str] = None
-    semantic_score: float = 0.0
-    
-    class Config:
-        json_schema_extra = {
-            "example": {
-                "name": "atta",
-                "quantity": 2.0,
-                "price": 200.0,
-                "unit": "kg",
-                "confidence": 0.92,
-                "original_text": "Atta-2 kg 200"
-            }
-        }
-
-
-class ProcessingResult(BaseModel):
-    """Final processing result"""
-    request_id: str
-    success: bool
-    customer_name: Optional[str] = None
-    items: List[ExtractedItem] = []
-    # Mobile compatibility alias for legacy clients expecting item/qty/price rows.
-    items_list: List[Dict[str, str]] = []
-    # Explicit alias some clients read first.
-    line_items: List[Dict[str, str]] = []
-    total_amount: float = 0.0
-    # Mobile compatibility alias
-    total: float = 0.0
-    transaction_type: str = "unknown"
-    # Mobile compatibility alias
-    type: str = "unknown"
-    # True when paper "Total" differs from computed (shopkeeper rule).
-    mismatch: bool = False
-    # Strict client-friendly overall confidence score (0..1)
-    confidence_score: float = 0.0
-    confidence: Dict[str, Union[float, str, bool]] = Field(default_factory=dict)
-    processing_time_ms: int = 0
-    status: ProcessingStatus = ProcessingStatus.PENDING
-    errors: List[str] = []
-    warnings: List[str] = []
-    hitl_data: Dict[str, Any] = Field(
-        default_factory=lambda: {"name_review_required": False, "name_candidates": []}
-    )
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-    # Mobile compatibility alias
-    meta: Dict[str, Any] = Field(default_factory=dict)
-    
-    class Config:
-        json_schema_extra = {
-            "example": {
-                "request_id": "abc-123",
-                "success": True,
-                "customer_name": "Umar",
-                "items": [],
-                "total_amount": 950.0,
-                "transaction_type": "udhaar",
-                "confidence": {"items": 0.85, "total": 0.95},
-                "processing_time_ms": 2345
-            }
-        }
-
-
-# ============================================================================
-# ADVANCED IMAGE PREPROCESSOR (Pass 1)
-# ============================================================================
-
-class AdvancedImagePreprocessor:
-    """
-    Pass 1: Multi-stage image enhancement for handwritten parchis.
-    
-    Stages:
-    1. Orientation correction (0/90/180/270)
-    2. Perspective correction (document flattening)
-    3. Shadow removal (CLAHE)
-    4. Noise reduction (Non-local means)
-    5. Stroke width normalization (morphological)
-    6. Sharpening (unsharp mask)
-    7. Binarization (adaptive threshold)
-    """
-    
-    def __init__(self, config: SystemConfig):
-        self.config = config
-        
-    def analyze_image_quality(self, image: np.ndarray) -> Dict[str, float]:
-        """Analyze image quality metrics"""
-        if len(image.shape) == 3:
-            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-        else:
-            gray = image
-            
-        # Sharpness (Laplacian variance)
-        sharpness = cv2.Laplacian(gray, cv2.CV_64F).var()
-        
-        # Brightness
-        brightness = np.mean(gray)
-        
-        # Contrast (standard deviation)
-        contrast = gray.std()
-        
-        # Noise level
-        noise = np.std(gray - cv2.GaussianBlur(gray, (5, 5), 0))
-        
-        # Normalized scores (0-1)
-        sharpness_score = min(1.0, sharpness / 500)
-        brightness_score = brightness / 255
-        contrast_score = min(1.0, contrast / 100)
-        noise_score = max(0.0, 1.0 - (noise / 50))
-        
-        overall_quality = (sharpness_score + brightness_score + contrast_score + noise_score) / 4
-        
-        return {
-            "sharpness": round(sharpness_score, 3),
-            "brightness": round(brightness_score, 3),
-            "contrast": round(contrast_score, 3),
-            "noise": round(noise_score, 3),
-            "overall": round(overall_quality, 3)
-        }
-    
-    def auto_orient(self, image: np.ndarray) -> np.ndarray:
-        """
-        Auto-detect and correct image orientation using edge detection
-        """
-        try:
-            height, width = image.shape[:2]
-            if min(height, width) < 100:
-                return image
-                
-            # Convert to grayscale and resize for speed
-            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-            small = cv2.resize(gray, (max(400, width//3), max(300, height//3)))
-            
-            # Detect edges
-            edges = cv2.Canny(small, 50, 150)
-            
-            # Hough line detection
-            lines = cv2.HoughLines(edges, 1, np.pi/180, threshold=int(len(small) * 0.3))
-            
-            if lines is not None:
-                angles = []
-                for line in lines[:20]:  # Limit to 20 lines
-                    rho, theta = line[0]
-                    angle = theta * 180 / np.pi - 90
-                    if -45 < angle < 45:
-                        angles.append(angle)
-                
-                if angles:
-                    median_angle = np.median(angles)
-                    if abs(median_angle) > 3:
-                        # Rotate image
-                        h, w = image.shape[:2]
-                        center = (w // 2, h // 2)
-                        M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
-                        rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
-                        return rotated
-            
-            return image
-        except Exception as e:
-            logger.warning(f"Orientation correction failed: {e}")
-            return image
-    
-    def perspective_correction(self, image: np.ndarray) -> np.ndarray:
-        """
-        Apply perspective correction to flatten warped receipts
-        """
-        try:
-            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-            blur = cv2.GaussianBlur(gray, (5, 5), 0)
-            edges = cv2.Canny(blur, 50, 150)
-            
-            # Find contours
-            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            
-            if not contours:
-                return image
-                
-            # Find largest contour
-            largest_contour = max(contours, key=cv2.contourArea)
-            
-            # Approximate polygon
-            peri = cv2.arcLength(largest_contour, True)
-            approx = cv2.approxPolyDP(largest_contour, 0.02 * peri, True)
-            
-            # If we found 4 corners, apply perspective transform
-            if len(approx) == 4:
-                pts = approx.reshape(4, 2)
-                rect = self._order_points(pts)
-                warped = self._four_point_transform(image, rect)
-                return warped
-            
-            return image
-        except Exception as e:
-            logger.warning(f"Perspective correction failed: {e}")
-            return image
-    
-    def _order_points(self, pts: np.ndarray) -> np.ndarray:
-        """Order points in clockwise order"""
-        rect = np.zeros((4, 2), dtype=np.float32)
-        s = pts.sum(axis=1)
-        rect[0] = pts[np.argmin(s)]
-        rect[2] = pts[np.argmax(s)]
-        diff = np.diff(pts, axis=1)
-        rect[1] = pts[np.argmin(diff)]
-        rect[3] = pts[np.argmax(diff)]
-        return rect
-    
-    def _four_point_transform(self, image: np.ndarray, pts: np.ndarray) -> np.ndarray:
-        """Apply perspective transform"""
-        (tl, tr, br, bl) = pts
-        width_a = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
-        width_b = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
-        max_width = max(int(width_a), int(width_b))
-        height_a = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
-        height_b = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
-        max_height = max(int(height_a), int(height_b))
-        dst = np.array([[0, 0], [max_width - 1, 0], [max_width - 1, max_height - 1], [0, max_height - 1]], dtype=np.float32)
-        M = cv2.getPerspectiveTransform(pts, dst)
-        warped = cv2.warpPerspective(image, M, (max_width, max_height))
-        return warped
-    
-    def enhance_image(self, rgb: np.ndarray) -> np.ndarray:
-        """
-        Full enhancement pipeline
-        """
-        # 1. Orientation correction
-        oriented = self.auto_orient(rgb)
-        
-        # 2. Perspective correction
-        perspective = self.perspective_correction(oriented)
-        
-        # Convert to grayscale
-        gray = cv2.cvtColor(perspective, cv2.COLOR_RGB2GRAY)
-        
-        # 3. CLAHE for shadow removal and contrast enhancement
-        clahe = cv2.createCLAHE(clipLimit=self.config.CLAHE_CLIP_LIMIT, tileGridSize=self.config.CLAHE_TILE_SIZE)
-        clahe_img = clahe.apply(gray)
-        
-        # 4. Denoise (Non-local means)
-        denoised = cv2.fastNlMeansDenoising(clahe_img, h=self.config.DENOISE_H)
-        
-        # 5. Sharpen
-        sharpened = cv2.filter2D(denoised, -1, self.config.SHARPEN_KERNEL)
-        
-        # 6. Morphological closing to connect broken strokes
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
-        morphed = cv2.morphologyEx(sharpened, cv2.MORPH_CLOSE, kernel)
-        
-        # 7. Adaptive thresholding for binarization
-        binary = cv2.adaptiveThreshold(
-            morphed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY, 15, 5
-        )
-        
-        # Return RGB format for OCR compatibility
-        return cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)
-    
-    def generate_variants(self, enhanced: np.ndarray) -> List[np.ndarray]:
-        """Generate multiple processing variants for ensemble"""
-        variants = [enhanced]
-        
-        # Inverted variant (for light text on dark background)
-        inverted = cv2.bitwise_not(enhanced)
-        variants.append(inverted)
-        
-        # High contrast variant
-        gray = cv2.cvtColor(enhanced, cv2.COLOR_RGB2GRAY)
-        _, high_contrast = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-        variants.append(cv2.cvtColor(high_contrast, cv2.COLOR_GRAY2RGB))
-        
-        # Morphological variant (thickened strokes)
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
-        dilated = cv2.dilate(gray, kernel, iterations=1)
-        variants.append(cv2.cvtColor(dilated, cv2.COLOR_GRAY2RGB))
-        
-        return variants[:4]  # Limit to 4 variants for speed
-
-
-# ============================================================================
-# MULTI-ENGINE OCR ENSEMBLE (Pass 2)
-# ============================================================================
-
-class OCRTextBlock(TypedDict):
-    """Typed dictionary for OCR results"""
-    text: str
-    confidence: float
-    bbox: List[List[float]]
-    line_number: int
-    engine: str
-
-
-class MultiEngineOCR:
-    """
-    Pass 2: Multi-engine OCR with intelligent result merging.
-    
-    Engines:
-    - EasyOCR (primary, good for Urdu)
-    - PaddleOCR (fallback, good for numbers)
-    - Tesseract (secondary fallback)
-    """
-    
-    def __init__(self, config: SystemConfig):
-        self.config = config
-        self._easyocr = None
-        self._paddle = None
-        self._initialized = False
-        self._init_lock = threading.Lock()
-        # v5.2: Debug metrics for parser robustness
-        self.easyocr_rows_skipped = 0
-        self.paddle_rows_skipped = 0
-
-    @staticmethod
-    def _extract_text_conf(entry: Any) -> Tuple[Optional[str], Optional[float]]:
-        """
-        Flexible OCR unpacking:
-        - [text, conf]
-        - [[box], (text, conf)]
-        - [[box], [text, conf]]
-        """
-        try:
-            # flat pair e.g. [text, conf]
-            if isinstance(entry, (list, tuple)) and len(entry) == 2:
-                a, b = entry
-                if isinstance(a, str):
-                    return a, float(b) if b is not None else None
-                if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)) and len(b) >= 2:
-                    return str(b[0]), float(b[1])
-                if isinstance(a, (list, tuple)) and isinstance(b, str):
-                    return b, None
-            # nested fallback: use last tuple/list if looks like (text, conf)
-            if isinstance(entry, (list, tuple)) and entry:
-                tail = entry[-1]
-                if isinstance(tail, (list, tuple)) and len(tail) >= 2:
-                    return str(tail[0]), float(tail[1])
-        except Exception:
-            return None, None
-        return None, None
-        
-    def initialize(self):
-        """One-time OCR engine init (call from startup — avoids first-request model download stall)."""
-        with self._init_lock:
-            if self._initialized:
-                return
-
-            if EASYOCR_AVAILABLE and easyocr is not None:
-                logger.info("Initializing EasyOCR (models cached under %s)...", SystemConfig.MODEL_CACHE)
-                try:
-                    # Suppress EasyOCR model download progress bars
-                    with redirect_stdout(open(os.devnull, 'w')), redirect_stderr(open(os.devnull, 'w')):
-                        self._easyocr = easyocr.Reader(
-                            ["en", "ur"],
-                            gpu=False,
-                            model_storage_directory=SystemConfig.MODEL_CACHE,
-                            verbose=False,
-                        )
-                except Exception as exc:
-                    logger.error(f"EasyOCR init failed: {exc}")
-                    self._easyocr = None
-            else:
-                logger.warning("EasyOCR not available — fallback to Paddle if enabled.")
-
-            if self.config.ENABLE_PADDLE and PADDLE_AVAILABLE:
-                logger.info("Initializing PaddleOCR (CPU)...")
-                paddle_kw = {
-                    "lang": "en",
-                    "use_angle_cls": True,
-                    "det_limit_side_len": 960,
-                    # CPU scaling
-                    "use_mkldnn": bool(SystemConfig.PADDLE_USE_MKLDNN),
-                    "cpu_threads": int(max(1, SystemConfig.PADDLE_CPU_THREADS)),
-                    # Prefer modern default models
-                    "ocr_version": SystemConfig.PADDLE_OCR_VERSION or "PP-OCRv4",
-                }
-                # Optional server-model dirs (if you provide them in the container cache)
-                if SystemConfig.PADDLE_DET_MODEL_DIR:
-                    paddle_kw["det_model_dir"] = SystemConfig.PADDLE_DET_MODEL_DIR
-                if SystemConfig.PADDLE_REC_MODEL_DIR:
-                    paddle_kw["rec_model_dir"] = SystemConfig.PADDLE_REC_MODEL_DIR
-                if SystemConfig.PADDLE_CLS_MODEL_DIR:
-                    paddle_kw["cls_model_dir"] = SystemConfig.PADDLE_CLS_MODEL_DIR
-                try:
-                    # Suppress PaddleOCR model download progress bars
-                    with redirect_stdout(open(os.devnull, 'w')), redirect_stderr(open(os.devnull, 'w')):
-                        try:
-                            self._paddle = PaddleOCR(**paddle_kw)
-                        except TypeError:
-                            # Fallback for older PaddleOCR versions: drop newer kwargs
-                            safe_kw = {"lang": "en", "use_angle_cls": True, "det_limit_side_len": 960}
-                            self._paddle = PaddleOCR(**safe_kw)
-                        except AttributeError as exc:
-                            # Seen in some builds: AnalysisConfig missing optimization methods.
-                            # Retry with MKLDNN disabled + smaller kwargs to avoid inference config path.
-                            logger.warning("PaddleOCR init attribute error, retrying safe-mode: %s", exc)
-                            safe_kw = {"lang": "en", "use_angle_cls": True, "det_limit_side_len": 960, "cpu_threads": 2}
-                            self._paddle = PaddleOCR(**safe_kw)
-                except Exception as exc:
-                    logger.error("PaddleOCR init failed (will use EasyOCR only): %s", exc)
-                    self._paddle = None
-                    # Disable paddle for this session
-                    self.config.ENABLE_PADDLE = False
-
-            self._initialized = True
-            logger.info(
-                "OCR engines ready | easyocr=%s paddle=%s",
-                bool(self._easyocr),
-                bool(self._paddle),
-            )
-    
-    def run_easyocr(self, image: np.ndarray) -> List[OCRTextBlock]:
-        """Run EasyOCR and format results"""
-        results = []
-        self.easyocr_rows_skipped = 0  # v5.2: Reset counter per call
-        if not self._easyocr:
-            return results
-        try:
-            raw_results = self._easyocr.readtext(
-                image,
-                detail=1,
-                paragraph=False,
-                width_ths=0.5,
-                ycenter_ths=0.5,
-                text_threshold=self.config.EASYOCR_TEXT_THRESH,
-                low_text=self.config.EASYOCR_LOW_TEXT,
-                link_threshold=0.3,
-                mag_ratio=1.5
-            )
-            
-            for row in raw_results:
-                try:
-                    bbox = row[0] if isinstance(row, (list, tuple)) and len(row) > 0 else None
-                    text, conf = self._extract_text_conf(row)
-                    if not text or conf is None:
-                        self.easyocr_rows_skipped += 1  # v5.2: Track skipped rows
-                        continue
-                    if conf >= self.config.EASYOCR_CONF_THRESH and text.strip() and bbox:
-                        y_center = (bbox[0][1] + bbox[2][1]) / 2
-                        results.append(
-                            OCRTextBlock(
-                                text=str(text).strip(),
-                                confidence=float(conf) * self.config.EASYOCR_WEIGHT,
-                                bbox=[[float(p[0]), float(p[1])] for p in bbox],
-                                line_number=int(y_center // 20),
-                                engine="easyocr",
-                            )
-                        )
-                    else:
-                        self.easyocr_rows_skipped += 1  # v5.2: Track confidence/threshold failures
-                except Exception as parse_exc:
-                    logger.warning("EasyOCR line skipped (unpack issue): %s", parse_exc)
-                    self.easyocr_rows_skipped += 1  # v5.2: Track parse exceptions
-                    continue
-        except Exception as e:
-            logger.error(f"EasyOCR failed: {e}")
-        return results
-    
-    def run_paddle(self, image: np.ndarray) -> List[OCRTextBlock]:
-        """Run PaddleOCR if available"""
-        results = []
-        self.paddle_rows_skipped = 0  # v5.2: Reset counter per call
-        if not self.config.ENABLE_PADDLE or not self._paddle:
-            return results
-            
-        try:
-            # Keep Paddle prediction call API-stable across versions.
-            # Some runtime builds fail on explicit `cls=` argument.
-            with redirect_stdout(open(os.devnull, 'w')), redirect_stderr(open(os.devnull, 'w')):
-                raw_results = self._paddle.ocr(image)
-            blocks = raw_results if isinstance(raw_results, list) else [raw_results]
-            for block in blocks:
-                if not block:
-                    continue
-                for line in block:
-                    try:
-                        bbox = line[0] if isinstance(line, (list, tuple)) and len(line) > 0 else None
-                        text, conf = self._extract_text_conf(line)
-                        if not bbox or not text or conf is None:
-                            self.paddle_rows_skipped += 1  # v5.2: Track skipped rows
-                            continue
-                        if conf >= self.config.PADDLE_CONF_THRESH and str(text).strip():
-                            y_center = (float(bbox[0][1]) + float(bbox[2][1])) / 2.0
-                            results.append(
-                                OCRTextBlock(
-                                    text=str(text).strip(),
-                                    confidence=float(conf) * self.config.PADDLE_WEIGHT,
-                                    bbox=[[float(p[0]), float(p[1])] for p in bbox],
-                                    line_number=int(y_center // 20),
-                                    engine="paddle",
-                                )
-                            )
-                        else:
-                            self.paddle_rows_skipped += 1  # v5.2: Track confidence/threshold failures
-                    except Exception as parse_exc:
-                        logger.warning("Paddle line skipped (unpack issue): %s", parse_exc)
-                        self.paddle_rows_skipped += 1  # v5.2: Track parse exceptions
-                        continue
-        except Exception as e:
-            error_str = str(e)
-            # Check if it's the known compatibility error
-            if "ConvertPirAttribute2RuntimeAttribute" in error_str or "Unimplemented" in error_str:
-                logger.warning("PaddleOCR version compatibility issue - disabling for this session: %s", e)
-                self._paddle = None
-                self.config.ENABLE_PADDLE = False
-            else:
-                logger.error(f"PaddleOCR failed: {e}")
-        return results
-
-    def run_easyocr_paragraph_lines(self, image: np.ndarray) -> List[str]:
-        """Extra pass: paragraph mode often recovers line breaks lost in token mode."""
-        if not self._easyocr:
-            return []
-        try:
-            paras = self._easyocr.readtext(image, detail=0, paragraph=True)
-            if isinstance(paras, str):
-                return [normalize_ocr_text(paras)] if paras.strip() else []
-            out: List[str] = []
-            for p in paras or []:
-                t = normalize_ocr_text(str(p).strip())
-                if t:
-                    out.append(t)
-            return out
-        except Exception as exc:
-            logger.warning("EasyOCR paragraph pass skipped: %s", exc)
-            return []
-
-    @staticmethod
-    def _bbox_y_range(bb: List[List[float]]) -> Tuple[float, float]:
-        ys = [float(p[1]) for p in bb]
-        return min(ys), max(ys)
-
-    @staticmethod
-    def _bbox_x_range(bb: List[List[float]]) -> Tuple[float, float]:
-        xs = [float(p[0]) for p in bb]
-        return min(xs), max(xs)
-
-    @staticmethod
-    def _vertical_overlap_ratio(bb1: List[List[float]], bb2: List[List[float]]) -> float:
-        y1a, y1b = MultiEngineOCR._bbox_y_range(bb1)
-        y2a, y2b = MultiEngineOCR._bbox_y_range(bb2)
-        inter = max(0.0, min(y1b, y2b) - max(y1a, y2a))
-        h = max(min(y1b - y1a, y2b - y2a), 1.0)
-        return inter / h
-
-    @staticmethod
-    def _horizontal_near(bb1: List[List[float]], bb2: List[List[float]], gap_tol: float = 48.0) -> bool:
-        x1a, x1b = MultiEngineOCR._bbox_x_range(bb1)
-        x2a, x2b = MultiEngineOCR._bbox_x_range(bb2)
-        return not (x1b < x2a - gap_tol or x2b < x1a - gap_tol)
-
-    @staticmethod
-    def _fuse_digit_hints(primary: str, hints: List[str]) -> str:
-        """Keep Paddle layout; borrow digit shapes from EasyOCR overlaps (0 vs O, 1 vs l)."""
-        if not hints:
-            return primary
-        hint_join = " ".join(hints)
-        t = primary.replace("O", "0").replace("o", "0").replace("l", "1").replace("I", "1")
-        hj = hint_join.replace("O", "0").replace("o", "0").replace("l", "1").replace("I", "1")
-        # If digit-only skeletons match length, prefer hint digit run for numeric tokens
-        def digits(s: str) -> str:
-            return re.sub(r"\D", "", s)
-
-        if digits(hint_join) and digits(primary) and abs(len(digits(hint_join)) - len(digits(primary))) <= 1:
-            if digits(hint_join) != digits(primary) and len(digits(hint_join)) >= len(digits(primary)) - 1:
-                return hint_join.strip() if len(hint_join) <= len(primary) + 6 else t
-        return t
-
-    def fuse_confidence_spatial_layout(
-        self, paddle_blocks: List[OCRTextBlock], easy_blocks: List[OCRTextBlock]
-    ) -> List[OCRTextBlock]:
-        """
-        Confidence winner: Paddle supplies reading order + boxes; EasyOCR refines digit glyphs on overlaps.
-        """
-        if not paddle_blocks:
-            return self.merge_results([easy_blocks, []]) if easy_blocks else []
-        if not easy_blocks:
-            return list(paddle_blocks)
-
-        def yc(b: OCRTextBlock) -> float:
-            bb = b["bbox"]
-            return (float(bb[0][1]) + float(bb[2][1])) / 2.0
-
-        def xc(b: OCRTextBlock) -> float:
-            return MultiEngineOCR._bbox_x_range(b["bbox"])[0]
-
-        fused: List[OCRTextBlock] = []
-        for pb in sorted(paddle_blocks, key=lambda b: (yc(b), xc(b))):
-            hints = [
-                eb["text"]
-                for eb in easy_blocks
-                if self._vertical_overlap_ratio(pb["bbox"], eb["bbox"]) >= 0.32
-                and self._horizontal_near(pb["bbox"], eb["bbox"])
-            ]
-            txt = self._fuse_digit_hints(pb["text"], hints).strip()
-            # Numeric-aware confidence: Paddle tends to be more reliable on digits/prices.
-            is_numeric = bool(re.fullmatch(r"[\d\.,]+", re.sub(r"\s+", "", txt)))
-            conf_boost = 1.10 if is_numeric else 1.04
-            fused.append(
-                OCRTextBlock(
-                    text=txt or pb["text"].strip(),
-                    confidence=min(1.0, float(pb["confidence"]) * conf_boost),
-                    bbox=pb["bbox"],
-                    line_number=pb["line_number"],
-                    engine="fused_paddle_easy",
+log = logging.getLogger("parchi-ocr")
+
+# ---------------------------------------------------------------------------
+# Config  (centralised – change here, takes effect everywhere)
+# ---------------------------------------------------------------------------
+class Config:
+    TARGET_WIDTH        = 1200        # px – resize before OCR if larger
+    MAX_IMAGE_SIZE_MB   = 20
+    MIN_ITEM_PRICE      = 1           # ignore prices below this (noise)
+    MAX_ITEM_PRICE      = 100_000     # ignore prices above this (noise)
+    MAX_ITEM_QTY        = 1_000
+    NAME_ROI_RATIO      = 0.15
+    TOTAL_ROI_RATIO     = 0.25
+
+# ---------------------------------------------------------------------------
+# Lexicon  (two-tier: fast dict lookup THEN rapidfuzz fallback)
+# ---------------------------------------------------------------------------
+
+# Tier-1: explicit variant → canonical (O(1), zero false-positives)
+ITEM_CORRECTIONS: dict[str, str] = {
+    # atta / flour
+    "aata": "atta", "arta": "atta", "ata": "atta", "flour": "atta",
+    # cheeni / sugar
+    "chini": "cheeni", "sugar": "cheeni", "cheeny": "cheeni", "cheni": "cheeni",
+    # chawal / rice
+    "rice": "chawal",
+    # daal / lentils
+    "dal": "daal", "lentils": "daal", "lentil": "daal",
+    # ghee / oil
+    "desi ghee": "ghee", "tel": "oil",
+    # doodh / milk
+    "milk": "doodh", "dudh": "doodh",
+    # spices
+    "salt": "namak", "chili": "mirch", "turmeric": "haldi", "cumin": "zeera",
+    # personal care
+    "soap": "sabun", "sabon": "sabun",
+    # snacks / bakery
+    "buger": "burger", "bubiger": "burger", "buggar": "burger",
+    "bisconni": "biscuit", "double roti": "bread",
+    # eggs
+    "anday": "anda", "egg": "anda", "eggs": "anda",
+    # dairy
+    "yogurt": "dahi", "butter": "makhan",
+    # vegetables
+    "potato": "aloo", "onion": "pyaz", "tomato": "tamatar",
+    "meat": "gosht", "chicken": "murgi",
+}
+
+# Tier-2: flat list for rapidfuzz similarity (conservative fallback)
+LEXICON: list[str] = list(set(ITEM_CORRECTIONS.values())) + [
+    "sooji", "besan", "makai", "dhaniya", "chai", "paneer",
+    "sabzi", "roti", "cream", "shampoo", "hammam",
+]
+
+# keywords that signal totals / transaction type
+TOTAL_KW    = re.compile(r"(total|ٹوٹل|کل|jama|جمع|sum|amount)", re.I)
+UDHAAR_KW   = re.compile(r"(udhaar|ادھار|credit|baaki|باقی)", re.I)
+WASOOLI_KW  = re.compile(r"(wasooli|وصولی|received|payment|paid)", re.I)
+CASH_KW     = re.compile(r"(cash|نقد|naqd)", re.I)
+
+# Urdu digit map
+URDU_DIGITS = str.maketrans("۰۱۲۳۴۵۶۷۸۹", "0123456789")
+
+# ---------------------------------------------------------------------------
+# LRU Cache (SHA-256 keyed, 24 h TTL, 500 entries)
+# ---------------------------------------------------------------------------
+_CACHE_MAX   = 500
+_CACHE_TTL_S = 86_400          # 24 h
+
+class _LRUCache:
+    def __init__(self, maxsize: int = _CACHE_MAX):
+        self._store: OrderedDict[str, tuple[float, Any]] = OrderedDict()
+        self._max   = maxsize
+
+    def _evict_expired(self):
+        now = time.monotonic()
+        stale = [k for k, (ts, _) in self._store.items() if now - ts > _CACHE_TTL_S]
+        for k in stale:
+            del self._store[k]
+
+    def get(self, key: str) -> Any | None:
+        self._evict_expired()
+        if key not in self._store:
+            return None
+        self._store.move_to_end(key)
+        return self._store[key][1]
+
+    def set(self, key: str, value: Any):
+        self._evict_expired()
+        if key in self._store:
+            self._store.move_to_end(key)
+        self._store[key] = (time.monotonic(), value)
+        if len(self._store) > self._max:
+            self._store.popitem(last=False)
+
+_cache = _LRUCache()
+
+# ---------------------------------------------------------------------------
+# EasyOCR Reader  (lazy singleton, warm on first request)
+# ---------------------------------------------------------------------------
+_reader: easyocr.Reader | None = None
+_reader_lock = threading.Lock()          # double-checked locking for thread safety
+
+def get_reader() -> easyocr.Reader:
+    global _reader
+    if _reader is None:
+        with _reader_lock:
+            if _reader is None:          # re-check after acquiring lock
+                log.info("Initialising EasyOCR reader [ur, en] …")
+                _reader = easyocr.Reader(
+                    ["ur", "en"],
+                    gpu=False,
+                    model_storage_directory="/tmp/easyocr_models",
+                    download_enabled=True,
+                    verbose=False,
                 )
-            )
-        # Easy-only tokens (Paddle missed)
-        for eb in easy_blocks:
-            if not any(
-                self._vertical_overlap_ratio(pb["bbox"], eb["bbox"]) >= 0.22
-                and self._horizontal_near(pb["bbox"], eb["bbox"])
-                for pb in paddle_blocks
-            ):
-                fused.append(eb)
-        return sorted(fused, key=lambda b: (yc(b), xc(b)))
-
-    def merge_results(self, results_list: List[List[OCRTextBlock]]) -> List[OCRTextBlock]:
-        """Merge results from multiple engines using spatial clustering"""
-        all_results = []
-        for results in results_list:
-            all_results.extend(results)
-            
-        if not all_results:
-            return []
-            
-        # Group by spatial proximity
-        groups = self._cluster_by_position(all_results)
-        
-        merged = []
-        for group in groups:
-            if len(group) == 1:
-                merged.append(group[0])
-            else:
-                merged.append(self._merge_group(group))
-                
-        merged.sort(key=lambda x: x['line_number'])
-        return merged
-    
-    def _cluster_by_position(self, results: List[OCRTextBlock], threshold: float = 40.0) -> List[List[OCRTextBlock]]:
-        """Group OCR results by vertical position"""
-        if not results:
-            return []
-            
-        sorted_results = sorted(results, key=lambda x: x['line_number'])
-        groups = []
-        current_group = [sorted_results[0]]
-        
-        for result in sorted_results[1:]:
-            if abs(result['line_number'] - current_group[-1]['line_number']) <= threshold / 20:
-                current_group.append(result)
-            else:
-                groups.append(current_group)
-                current_group = [result]
-                
-        if current_group:
-            groups.append(current_group)
-            
-        return groups
-    
-    def _merge_group(self, group: List[OCRTextBlock]) -> OCRTextBlock:
-        """Merge multiple detections of the same text"""
-        group.sort(key=lambda x: x['confidence'], reverse=True)
-        best = group[0]
-        
-        # Find consensus text
-        texts = [g['text'] for g in group]
-        consensus = max(set(texts), key=texts.count) if texts else best['text']
-        
-        # Average confidence
-        avg_conf = sum(g['confidence'] for g in group) / len(group)
-        
-        return OCRTextBlock(
-            text=consensus,
-            confidence=min(1.0, avg_conf * 1.1),  # Boost merged confidence
-            bbox=best['bbox'],
-            line_number=best['line_number'],
-            engine="merged"
-        )
-    
-    def extract_text_lines(self, image: np.ndarray) -> Tuple[List[str], List[OCRTextBlock]]:
-        """Extract text lines from image using all available engines"""
-        self.initialize()
-
-        easyocr_results = self.run_easyocr(image)
-        paddle_results = self.run_paddle(image)
-        if paddle_results and easyocr_results:
-            merged = self.fuse_confidence_spatial_layout(paddle_results, easyocr_results)
-        elif paddle_results:
-            merged = list(paddle_results)
-        else:
-            merged = self.merge_results([easyocr_results, paddle_results])
-
-        # Group tokens by true vertical center (avoids collapsing many lines when line_number buckets match).
-        def y_center(rb: OCRTextBlock) -> float:
-            bb = rb["bbox"]
-            if not bb or len(bb) < 3:
-                return float(rb["line_number"] * 20)
-            return (float(bb[0][1]) + float(bb[2][1])) / 2.0
-
-        lines: List[str] = []
-        if merged:
-            sorted_blocks = sorted(merged, key=y_center)
-            row_tol = 28.0
-            current_line: List[str] = []
-            current_y: Optional[float] = None
-            for result in sorted_blocks:
-                yc = y_center(result)
-                if current_y is None or abs(yc - current_y) <= row_tol:
-                    current_line.append(result["text"])
-                    if current_y is None:
-                        current_y = yc
-                    else:
-                        current_y = (current_y * (len(current_line) - 1) + yc) / len(current_line)
-                else:
-                    if current_line:
-                        lines.append(" ".join(current_line))
-                    current_line = [result["text"]]
-                    current_y = yc
-            if current_line:
-                lines.append(" ".join(current_line))
-
-        # Supplement with paragraph-mode lines when token merge is thin (common on synthetic / dense slips).
-        if len(lines) < 3:
-            seen_norm = {norm(x) for x in lines}
-            for pl in self.run_easyocr_paragraph_lines(image):
-                key = norm(pl)
-                if len(key) > 2 and key not in seen_norm:
-                    lines.append(pl)
-                    seen_norm.add(key)
-
-        return lines, merged
-
-    def extract_top_roi_name_lines(self, image: np.ndarray, roi_ratio: float = 0.22) -> List[str]:
-        """
-        Name-focused micro-pass:
-        Run OCR only on top ROI with a couple of variants so customer name
-        has a dedicated extraction path independent from item parsing.
-        """
-        self.initialize()
-        if image is None or not isinstance(image, np.ndarray) or image.size == 0:
-            return []
-        try:
-            h, _w = image.shape[:2]
-            cut = max(40, int(h * max(0.10, min(roi_ratio, 0.35))))
-            top = image[:cut, :].copy()
-            if top.size == 0:
-                return []
-            variants: List[np.ndarray] = [top]
-            try:
-                gray = cv2.cvtColor(top, cv2.COLOR_RGB2GRAY)
-                variants.append(cv2.cvtColor(cv2.bitwise_not(gray), cv2.COLOR_GRAY2RGB))
-                # Slightly contrast-boosted for weak handwriting.
-                clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(6, 6))
-                variants.append(cv2.cvtColor(clahe.apply(gray), cv2.COLOR_GRAY2RGB))
-            except Exception:
-                pass
-
-            out: List[str] = []
-            seen: set[str] = set()
-            for var_img in variants[:3]:
-                blocks = self.run_paddle(var_img) + self.run_easyocr(var_img)
-                for b in blocks:
-                    t = normalize_ocr_text(str(b.get("text", "")).strip())
-                    k = norm(t)
-                    if len(k) >= 2 and k not in seen:
-                        out.append(t)
-                        seen.add(k)
-            return out
-        except Exception as exc:
-            logger.warning("Top ROI name micro-pass failed: %s", exc)
-            return []
-
-    def extract_bottom_roi_total_lines(self, image: np.ndarray, roi_ratio: float = 0.38) -> List[str]:
-        """
-        Fast total-focused micro-pass:
-        Only OCR bottom ROI to recover totals/paid/balance quickly when math mismatch is detected.
-        """
-        self.initialize()
-        if image is None or not isinstance(image, np.ndarray) or image.size == 0:
-            return []
-        try:
-            h, _w = image.shape[:2]
-            cut = max(80, int(h * max(0.20, min(roi_ratio, 0.55))))
-            bot = image[max(0, h - cut) :, :].copy()
-            if bot.size == 0:
-                return []
-            variants: List[np.ndarray] = [bot]
-            try:
-                gray = cv2.cvtColor(bot, cv2.COLOR_RGB2GRAY)
-                # high-contrast + invert catch faint pencil totals
-                _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-                variants.append(cv2.cvtColor(th, cv2.COLOR_GRAY2RGB))
-                variants.append(cv2.cvtColor(cv2.bitwise_not(th), cv2.COLOR_GRAY2RGB))
-            except Exception:
-                pass
-
-            out: List[str] = []
-            seen: set[str] = set()
-            for var_img in variants[:3]:
-                # totals are numeric-heavy: prefer paddle first
-                blocks = self.run_paddle(var_img) + self.run_easyocr(var_img)
-                for b in blocks:
-                    t = normalize_ocr_text(str(b.get("text", "")).strip())
-                    k = norm(t)
-                    if len(k) >= 2 and k not in seen:
-                        out.append(t)
-                        seen.add(k)
-            return out
-        except Exception as exc:
-            logger.warning("Bottom ROI total micro-pass failed: %s", exc)
-            return []
-
-
-# ============================================================================
-# UTILITY FUNCTIONS (v5.1 - Geometry & Context-Aware Parsing)
-# ============================================================================
-
-def norm(s: str) -> str:
-    """Lowercase collapsed key for dedupe / dict keys."""
-    return re.sub(r"\s+", " ", (s or "").strip().lower())
+                log.info("EasyOCR ready.")
+    return _reader
+
+# ---------------------------------------------------------------------------
+# Image preprocessing helpers
+# ---------------------------------------------------------------------------
+
+def _pil_to_bgr(img: Image.Image) -> np.ndarray:
+    return cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2BGR)
+
+
+def _auto_rotate(bgr: np.ndarray) -> np.ndarray:
+    """Deskew via Hough lines – very cheap on CPU."""
+    gray  = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
+    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+    lines = cv2.HoughLinesP(edges, 1, math.pi / 180, 80, minLineLength=60, maxLineGap=10)
+    if lines is None:
+        return bgr
+    angles = []
+    for x1, y1, x2, y2 in lines[:, 0]:
+        if x2 != x1:
+            angles.append(math.degrees(math.atan2(y2 - y1, x2 - x1)))
+    if not angles:
+        return bgr
+    median_angle = float(np.median(angles))
+    if abs(median_angle) < 0.5 or abs(median_angle) > 45:
+        return bgr
+    h, w = bgr.shape[:2]
+    M = cv2.getRotationMatrix2D((w / 2, h / 2), median_angle, 1.0)
+    return cv2.warpAffine(bgr, M, (w, h), flags=cv2.INTER_CUBIC,
+                          borderMode=cv2.BORDER_REPLICATE)
+
+
+def _clahe_sharpen(bgr: np.ndarray) -> np.ndarray:
+    """CLAHE → denoise → sharpen."""
+    lab   = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
+    l, a, b = cv2.split(lab)
+    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+    l     = clahe.apply(l)
+    lab   = cv2.merge([l, a, b])
+    bgr   = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+    bgr   = cv2.fastNlMeansDenoisingColored(bgr, None, 7, 7, 7, 21)
+    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32)
+    return cv2.filter2D(bgr, -1, kernel)
+
+
+def _adaptive_thresh(bgr: np.ndarray) -> np.ndarray:
+    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
+    return cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                 cv2.THRESH_BINARY, 31, 10)
+
+
+def build_variants(bgr: np.ndarray) -> list[np.ndarray]:
+    """Return list of OCR-ready image variants."""
+    enhanced = _clahe_sharpen(bgr)
+    variants = [
+        enhanced,                                                   # enhanced colour
+        cv2.bitwise_not(enhanced),                                  # inverted
+        _adaptive_thresh(enhanced),                                 # binary
+        cv2.convertScaleAbs(enhanced, alpha=1.5, beta=20),         # high-contrast
+    ]
+    return variants
 
+# ---------------------------------------------------------------------------
+# OCR helpers
+# ---------------------------------------------------------------------------
 
-def sliding_split_multi_price_line(line: str) -> List[str]:
-    """
-    Split a single OCR line that glued multiple rows (e.g. 'cheeni 2 200 atta 1 500').
-    Handles [Name] [sep] [Qty] [sep] [Price] with - * : etc.
-    """
-    line = normalize_ocr_text(line)
-    if not line.strip():
+def _run_ocr(reader: easyocr.Reader, img: np.ndarray) -> list[dict]:
+    """Run EasyOCR on one image variant; return list of result dicts."""
+    try:
+        raw = reader.readtext(
+            img,
+            text_threshold=0.15,
+            low_text=0.10,
+            link_threshold=0.25,
+            mag_ratio=1.5,
+            slope_ths=0.3,
+            ycenter_ths=0.8,
+            height_ths=0.7,
+            width_ths=0.9,
+            decoder="greedy",
+            beamWidth=3,
+        )
+    except Exception as exc:
+        log.warning("OCR variant failed: %s", exc)
         return []
-    out: List[str] = []
-    # Primary: repeated Name sep Qty sep Price
-    rx_row = re.compile(
-        r"(?i)([a-zA-Zء-ي][a-zA-Zء-ي\s]{0,42}?)\s*[\-\*–—:#]+\s*(\d+(?:\.\d+)?)\s*[\-\*–—:#]+\s*(\d+(?:\.\d+)?)"
-    )
-    pos = 0
-    for m in rx_row.finditer(line):
-        if m.start() > pos:
-            mid = line[pos : m.start()].strip(" -*,:")
-            if mid and re.search(r"[A-Za-zء-ي]", mid):
-                out.append(mid)
-        out.append(m.group(0).strip())
-        pos = m.end()
-    if pos < len(line):
-        tail = line[pos:].strip(" -*,:")
-        if tail and re.search(r"[A-Za-zء-ي\d]", tail):
-            out.append(tail)
-    if out:
-        return [x for x in out if x.strip()]
-
-    # Sliding fallback: pair consecutive numbers as qty+price with preceding text
-    nums = list(re.finditer(r"(?<!\d)(\d{1,5}(?:\.\d{1,2})?)(?!\d)", line))
-    if len(nums) < 2:
-        return [line]
-    chunks: List[str] = []
-    last = 0
-    i = 0
-    while i + 1 < len(nums):
-        a, b = nums[i], nums[i + 1]
-        name_seg = line[last : a.start()].strip(" -*,:")
-        if name_seg and re.search(r"[A-Za-zء-ي]", name_seg):
-            chunks.append(f"{name_seg} {a.group(1)} {b.group(1)}".strip())
-            last = b.end()
-            i += 2
-        else:
-            i += 1
-    if last < len(line):
-        tail = line[last:].strip()
-        if tail:
-            chunks.append(tail)
-    return chunks if len(chunks) >= 2 else [line]
+    results = []
+    for (bbox, text, conf) in raw:
+        if not text.strip():
+            continue
+        xs = [p[0] for p in bbox]
+        ys = [p[1] for p in bbox]
+        results.append({
+            "text": text.strip(),
+            "conf": float(conf),
+            "x1": min(xs), "y1": min(ys),
+            "x2": max(xs), "y2": max(ys),
+            "yc": (min(ys) + max(ys)) / 2,
+        })
+    return results
 
 
-def normalize_ocr_text(text: str) -> str:
+def merge_ocr_results(all_results: list[list[dict]]) -> list[dict]:
     """
-    Context-aware cleaning for Pakistani retail OCR.
-    Handles common Urdu-English mixing and noise.
+    Merge results from multiple variants.
+    For overlapping detections keep highest-confidence.
     """
-    # Replace common Urdu lookalikes of numbers
-    text = text.replace('۰', '0').replace('۱', '1').replace('۲', '2')
-    text = text.replace('۳', '3').replace('۴', '4').replace('۵', '5')
-    text = text.replace('۶', '6').replace('۷', '7').replace('۸', '8')
-    text = text.replace('۹', '9')
-    
-    # Normalize common dashes
-    text = re.sub(r'[–—−]', '-', text)
-    
-    # Fix 'item-quantity price' pattern (e.g., 'Atta-2 500')
-    text = re.sub(r'([a-zA-Zء-ي]+)-(\d+)', r'\1 \2', text)
-    
-    # Normalize multiple spaces
-    text = re.sub(r'\s+', ' ', text).strip()
-    
+    if not all_results:
+        return []
+    merged: list[dict] = []
+    for variant_res in all_results:
+        for det in variant_res:
+            # Check overlap with any already-merged detection
+            duplicate = False
+            for existing in merged:
+                iou = _bbox_iou(det, existing)
+                if iou > 0.40:
+                    if det["conf"] > existing["conf"]:
+                        existing.update(det)
+                    duplicate = True
+                    break
+            if not duplicate:
+                merged.append(det.copy())
+    return merged
+
+
+def _bbox_iou(a: dict, b: dict) -> float:
+    ix1 = max(a["x1"], b["x1"])
+    iy1 = max(a["y1"], b["y1"])
+    ix2 = min(a["x2"], b["x2"])
+    iy2 = min(a["y2"], b["y2"])
+    inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
+    if inter == 0:
+        return 0.0
+    ua = (a["x2"] - a["x1"]) * (a["y2"] - a["y1"])
+    ub = (b["x2"] - b["x1"]) * (b["y2"] - b["y1"])
+    return inter / (ua + ub - inter + 1e-9)
+
+# ---------------------------------------------------------------------------
+# Text normalisation & number extraction
+# ---------------------------------------------------------------------------
+
+def normalise_text(text: str) -> str:
+    """Translate Urdu digits, common OCR confusions, clean whitespace."""
+    text = text.translate(URDU_DIGITS)
+    text = text.replace("O", "0").replace("o", "0").replace("l", "1") \
+               .replace("I", "1").replace("S", "5").replace("Z", "2") \
+               .replace("B", "8").replace("G", "9").replace("g", "9")
+    text = re.sub(r"\s+", " ", text).strip()
     return text
 
 
-def extract_numbers(text: str) -> List[float]:
-    """
-    Extract all numbers from text, handling decimals and common formats.
-    """
-    numbers = []
-    pattern = r'\b(\d{1,5}(?:\.\d{1,2})?|\d{1,3}(?:,\d{3})*(?:\.\d{1,2})?)'  # Handles 100, 1.5, 1,000.50
-    for match in re.finditer(pattern, text):
+_NUM_RE = re.compile(r"(\d[\d,./]*\d|\d)")
+
+def extract_numbers(text: str) -> list[float]:
+    """Return all numeric values found in text."""
+    nums = []
+    for m in _NUM_RE.finditer(text):
+        raw = m.group().replace(",", "")
         try:
-            num = float(match.group(1).replace(',', ''))
-            if 0 < num < 999999:  # Reasonable range
-                numbers.append(num)
+            nums.append(float(raw))
         except ValueError:
             pass
-    return numbers
-
+    return nums
 
-def parse_geometry_line(line: str) -> Optional[Dict[str, Any]]:
-    """
-    Universal line parser (Geometry-Based).
-    Parses: [Text/Mixed] [Separator] [Numbers]
-    Separators: -, :, multiple spaces
-    
-    Returns: {text, qty, price, confidence}
-    """
-    line = normalize_ocr_text(line)
-    
-    # Try pattern: "item - qty - price", "item * qty * price", "item: qty price", etc.
-    patterns = [
-        r"^([a-zA-Zء-ي\s]+?)\s*[\-\*–—:]+\s*(\d+(?:\.\d+)?)\s*[\-\*–—:]+\s*(\d+(?:\.\d+)?)",  # item sep qty sep price
-        r"^([a-zA-Zء-ي\s]+?)\s*[-:]\s*(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)",  # item-qty price (legacy)
-        r"^([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)\s*kg\s+(\d+(?:\.\d+)?)",  # item qty kg price
-        r"^([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)",  # item qty price (generic)
-    ]
-    
-    for pattern in patterns:
-        match = re.match(pattern, line, re.IGNORECASE)
-        if match:
-            item_text = match.group(1).strip()
-            try:
-                if len(match.groups()) == 3:
-                    qty = float(match.group(2))
-                    price = float(match.group(3))
-                    return {
-                        "text": item_text,
-                        "quantity": qty,
-                        "price": price,
-                        "confidence": 0.72,
-                        "source": "geometry"
-                    }
-            except (ValueError, IndexError):
-                pass
-    
-    return None
 
-
-def subset_sum_match(numbers: List[float], target: float, tolerance: float = 2.0) -> Tuple[List[float], bool]:
-    """
-    Find subset of numbers that sum to target (for total anchoring).
-    Uses greedy + exhaustive search for small lists.
-    
-    Returns: (matching_numbers, is_exact_match)
-    """
-    if not numbers:
-        return [], False
-    
-    # Sort descending for greedy approach
-    sorted_nums = sorted(numbers, reverse=True)
-    current_sum = 0
-    matched = []
-    
-    # Greedy approach first
-    for num in sorted_nums:
-        if abs(current_sum + num - target) <= abs(current_sum - target):
-            current_sum += num
-            matched.append(num)
-            if abs(current_sum - target) <= tolerance:
-                return matched, abs(current_sum - target) < 0.01
-    
-    # If greedy fails, try combinations (for small lists)
-    if len(numbers) <= 10:
-        for r in range(len(numbers), 0, -1):
-            for combo in itertools.combinations(numbers, r):
-                if abs(sum(combo) - target) <= tolerance:
-                    return list(combo), abs(sum(combo) - target) < 0.01
-
-    return [], False
-
-
-# ============================================================================
-# v5.2: ENHANCED PARSING & CONFIDENCE RECONSTRUCTION
-# ============================================================================
-
-def parse_aggressive_patterns(line: str) -> Optional[Dict[str, Any]]:
+def merge_spaced_digits(tokens: list[str]) -> list[str]:
     """
-    v5.2: Aggressive pattern matching for messy/poor quality images.
-    Supports more flexible formats and Urdu variants.
-    EXTREMELY LENIENT - if line has text + numbers, treat as potential item.
+    ['3', '0', '0'] → ['300']
+    ['2', '1'] → ['21']
+    Single digit tokens adjacent to other single digit tokens get merged.
     """
-    line = normalize_ocr_text(line)
-    if len(line.strip()) < 2:
-        return None
-    
-    # v5.2: ULTRA-LENIENT patterns - match almost anything with numbers
-    # These patterns are EXTREMELY flexible to catch all variations
-    patterns = [
-        # Pattern 1: "item - qty - price" / "item * qty * price"
-        r'([a-zA-Zء-ي\s]+?)\s*[\-\*–—:]+\s*(\d+(?:\.\d+)?)\s*[\-\*–—:]+\s*(\d+(?:\.\d+)?)',
-        # Pattern 1b: "item - qty price" or "item - price"
-        r'([a-zA-Zء-ي\s]+?)\s*[-:–—]\s*(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)',
-        # Pattern 2: "item qty unit price" (with optional units)
-        r'([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)\s*(?:kg|g|liter|litre|ml|dozen|dz|pcs|pc|pkt|packet)?\s*(\d+(?:\.\d+)?)',
-        # Pattern 3: "item price" (qty=1 assumed) - VERY LENIENT
-        r'([a-zA-Zء-ي\s]{2,})\s+(\d+(?:\.\d+)?)',
-        # Pattern 4: "item qty: price" or similar
-        r'([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)\s*[:–—]\s*(\d+(?:\.\d+)?)',
-        # Pattern 5: "item qty-price" (hyphenated with floats)
-        r'([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)[-–]\s*(\d+(?:\.\d+)?)',
-    ]
-    
-    for pattern_idx, pattern in enumerate(patterns):
-        # Use search instead of match to find pattern anywhere in line
-        match = re.search(pattern, line, re.IGNORECASE)
-        if match:
-            item_text = match.group(1).strip()
-            if not item_text or len(item_text) < 2:
-                continue
-                
-            try:
-                groups = match.groups()
-                if len(groups) >= 2:
-                    if len(groups) == 2:
-                        price = float(groups[1])
-                        qty = 1.0
-                    else:
-                        qty_str = str(groups[1]).replace("۲", "2").replace("۱", "1").replace("۳", "3").replace("۴", "4").replace("۵", "5")
-                        qty = float(qty_str)
-                        price = float(groups[2])
-                    
-                    # LENIENT validation - allow wider range
-                    if 0.1 <= qty <= 200 and 1 <= price <= 99999:
-                        return {
-                            "text": item_text,
-                            "quantity": qty,
-                            "price": price,
-                            "confidence": 0.60 + (0.08 if pattern_idx < 2 else 0.02),
-                            "source": f"aggressive_p{pattern_idx+1}"
-                        }
-            except (ValueError, IndexError) as e:
+    out: list[str] = []
+    i = 0
+    while i < len(tokens):
+        tok = tokens[i]
+        if re.fullmatch(r"\d", tok):
+            # collect consecutive single digits
+            group = [tok]
+            j = i + 1
+            while j < len(tokens) and re.fullmatch(r"\d", tokens[j]):
+                group.append(tokens[j])
+                j += 1
+            if len(group) > 1:
+                out.append("".join(group))
+                i = j
                 continue
-    
-    return None
-
-
-def reconstruct_items_from_total(total: float, possible_prices: List[float]) -> List[Tuple[float, float]]:
-    """
-    v5.2: Reconstruct likely items from reported total.
-    Used when item extraction fails but total is present.
-    Returns: [(qty, price), ...]
-    """
-    if not possible_prices or total <= 0:
+        out.append(tok)
+        i += 1
+    return out
+
+# ---------------------------------------------------------------------------
+# Lexicon correction (fuzzy, conservative)
+# ---------------------------------------------------------------------------
+
+def lexicon_correct(word: str) -> str:
+    """Two-tier correction: dict lookup first, then conservative rapidfuzz."""
+    if len(word) < 2 or word.isdigit():
+        return word
+    w = word.lower().strip()
+    # Tier-1: exact dict match (fastest, zero false-positives)
+    if w in ITEM_CORRECTIONS:
+        return ITEM_CORRECTIONS[w]
+    # Tier-2: fuzzy similarity (score ≥ 85, length diff ≤ 3)
+    match = rfprocess.extractOne(w, LEXICON, score_cutoff=85)
+    if match is None:
+        return w
+    best_word, _score, _ = match
+    if abs(len(best_word) - len(w)) > 3:
+        return w
+    return best_word
+
+# ---------------------------------------------------------------------------
+# Geometry: group detections into lines
+# ---------------------------------------------------------------------------
+
+def group_into_lines(dets: list[dict], gap_factor: float = 0.6) -> list[list[dict]]:
+    """
+    Cluster detections by Y-centre into horizontal lines.
+    gap_factor: fraction of median text height that counts as new line.
+    """
+    if not dets:
         return []
-    
-    # Find combination of prices that sum to total
-    reconstructed = []
-    remaining = total
-    prices_copy = sorted(possible_prices, reverse=True)
-    
-    for price in prices_copy:
-        if remaining >= price and price >= 1:
-            # Estimate qty
-            qty = max(1.0, round(remaining / price))
-            if qty <= 20:  # Reasonable qty
-                reconstructed.append((qty, price))
-                remaining -= qty * price
-                if abs(remaining) < 2:  # Close enough
-                    break
-    
-    return reconstructed
-
+    heights = [d["y2"] - d["y1"] for d in dets if d["y2"] > d["y1"]]
+    median_h = float(np.median(heights)) if heights else 20.0
+    threshold = median_h * gap_factor
+
+    sorted_dets = sorted(dets, key=lambda d: d["yc"])
+    lines: list[list[dict]] = []
+    current_line: list[dict] = [sorted_dets[0]]
+    ref_yc = sorted_dets[0]["yc"]
+
+    for det in sorted_dets[1:]:
+        if abs(det["yc"] - ref_yc) <= threshold:
+            current_line.append(det)
+        else:
+            lines.append(sorted(current_line, key=lambda d: d["x1"]))
+            current_line = [det]
+            ref_yc = det["yc"]
+    lines.append(sorted(current_line, key=lambda d: d["x1"]))
+    return lines
 
-def boost_confidence(base_confidence: float, item_name: str, price: float, 
-                    in_lexicon: bool, math_validated: bool, sharpness: float) -> float:
-    """
-    v5.2: Intelligent confidence boosting based on multiple factors.
-    """
-    boosted = base_confidence
-    
-    # Boost if in lexicon
-    if in_lexicon:
-        boosted += SystemConfig.CONF_BOOST_LEXICON
-    
-    # Boost if matches known patterns (common items)
-    common_items = ['atta', 'chai', 'milk', 'doodh', 'chay', 'roghan', 'namak', 'chini']
-    if item_name.lower() in common_items:
-        boosted += SystemConfig.CONF_BOOST_PATTERN
-    
-    # Boost if math validates
-    if math_validated:
-        boosted += SystemConfig.CONF_BOOST_MATH
-    
-    # Quality-based adjustment
-    if sharpness > SystemConfig.QUALITY_SHARPNESS_EXCELLENT:
-        boosted += 0.05  # High quality boost
-    elif sharpness < SystemConfig.QUALITY_SHARPNESS_POOR:
-        boosted *= 0.95  # Low quality penalty
-    
-    return min(0.98, max(0.35, boosted))  # Clamp between 0.35-0.98
-
-
-# ============================================================================
-# URDU TEXT NORMALIZER
-# ============================================================================
-
-class UrduTextNormalizer:
-    """
-    Specialized normalizer for Urdu text handling.
-    Handles Nastaliq script complexities, diacritics, and Roman Urdu.
-    """
-    
-    # Roman Urdu to Urdu mapping (common patterns)
-    ROMAN_TO_URDU = {
-        'a': 'ا', 'b': 'ب', 'p': 'پ', 't': 'ت', 's': 'س',
-        'j': 'ج', 'ch': 'چ', 'h': 'ح', 'kh': 'خ', 'd': 'د',
-        'z': 'ز', 'r': 'ر', 'sh': 'ش', 'gh': 'غ', 'f': 'ف',
-        'q': 'ق', 'k': 'ک', 'g': 'گ', 'l': 'ل', 'm': 'م',
-        'n': 'ن', 'w': 'و', 'y': 'ی', 'e': 'ے'
-    }
-    
-    # Common normalization rules
-    NORMALIZATION_RULES = {
-        'ي': 'ی',  # Different Yeh forms
-        'ى': 'ی',
-        'ة': 'ہ',  # Ta Marbuta to Heh
-        'ھ': 'ہ',  # Heh doachashmee to Heh
-        'ك': 'ک',  # Arabic Kaf to Urdu Keh
-    }
-    
-    @classmethod
-    def normalize_urdu_text(cls, text: str) -> str:
-        """Normalize Urdu text: unify similar characters"""
-        for old, new in cls.NORMALIZATION_RULES.items():
-            text = text.replace(old, new)
-        return text
-    
-    @classmethod
-    def roman_to_urdu(cls, text: str) -> str:
-        """Convert Roman Urdu to proper Urdu script"""
-        # Only apply if text contains mostly Roman characters
-        roman_ratio = sum(c.isascii() for c in text) / max(1, len(text))
-        if roman_ratio < 0.5:
-            return text
-            
-        result = []
-        i = 0
-        while i < len(text):
-            matched = False
-            # Try longest matches first
-            for length in range(3, 0, -1):
-                if i + length <= len(text):
-                    sub = text[i:i+length].lower()
-                    if sub in cls.ROMAN_TO_URDU:
-                        result.append(cls.ROMAN_TO_URDU[sub])
-                        i += length
-                        matched = True
-                        break
-            if not matched:
-                result.append(text[i])
-                i += 1
-        return ''.join(result)
-    
-    @classmethod
-    def is_urdu(cls, text: str) -> bool:
-        """Detect if text contains Urdu characters"""
-        urdu_range = range(0x0600, 0x06FF)
-        return any(ord(c) in urdu_range for c in text)
-    
-    @classmethod
-    def clean_text(cls, text: str) -> str:
-        """Clean text: remove noise, normalize spaces"""
-        # Remove special characters (keep Urdu, English, numbers, spaces)
-        text = re.sub(r'[^\w\sء-ي]', ' ', text)
-        # Normalize spaces
-        text = re.sub(r'\s+', ' ', text)
-        return text.strip().lower()
-
-
-# ============================================================================
-# SEMANTIC LEXICON (Pass 3 - Complete Pakistani Market Lexicon)
-# ============================================================================
-
-class PakistaniRetailLexicon:
-    """
-    Complete semantic lexicon for Pakistani retail items.
-    Includes Urdu names, Roman Urdu variations, English names, and common misspellings.
-    """
-    
-    # Master lexicon with categories
-    LEXICON = {
-        # Staples (اناج)
-        'atta': ['atta', 'aata', 'arta', 'flour', 'wheat flour', 'chakki atta', 'آٹا'],
-        'cheeni': ['cheeni', 'chini', 'sugar', 'sugar s', 'white sugar', 'چینی'],
-        'chawal': ['chawal', 'rice', 'basmati', 'sella rice', 'tota chawal', 'چاول'],
-        'daal': ['daal', 'dal', 'lentils', 'daal mash', 'daal chana', 'daal moong', 'دال'],
-        'besan': ['besan', 'gram flour', 'chana flour', 'بیسن'],
-        
-        # Fats & Oils (چکنائی)
-        'ghee': ['ghee', 'desi ghee', 'گھی'],
-        'tel': ['tel', 'oil', 'cooking oil', 'تیل'],
-        'dalda': ['dalda', 'banaspati', 'vegetable ghee', 'ڈالڈا'],
-        
-        # Dairy (دودھ کی مصنوعات)
-        'doodh': ['doodh', 'milk', 'olpers', 'milkpak', 'دودھ'],
-        'dahi': ['dahi', 'yogurt', 'curd', 'دہی'],
-        'paneer': ['paneer', 'cottage cheese', 'پنیر'],
-        
-        # Spices (مصالحے)
-        'haldi': ['haldi', 'turmeric', 'haldi powder', 'ہلدی'],
-        'zeera': ['zeera', 'cumin', 'safaid zeera', 'زیرہ'],
-        'dhania': ['dhania', 'coriander', 'دھنیا'],
-        'mirch': ['mirch', 'red chili', 'lal mirch', 'مرچ'],
-        'namak': ['namak', 'salt', 'shan namak', 'نمک'],
-        'garam_masala': ['garam masala', 'all spice', 'گرم مصالحہ'],
-        
-        # Tea & Beverages (مشروبات)
-        'chai': ['chai', 'tea', 'patti', 'tapal', 'lipton', 'چائے'],
-        'coffee': ['coffee', 'cafe', 'کافی'],
-        'soda': ['soda', 'soft drink', 'coke', 'pepsi', 'soda water'],
-        
-        # Personal Care (ذاتی نگہداشت)
-        'sabun': ['sabun', 'soap', 'lux', 'safeguard', 'dettol', 'lifebuoy', 'صابن'],
-        'shampoo': ['shampoo', 'شیمپو'],
-        'cream': ['cream', 'moisturizer', 'fairness cream', 'کریم'],
-        'toothpaste': ['toothpaste', 'paste', 'dental cream', 'ٹوتھ پیسٹ'],
-        'razor': ['razor', 'blade', 'gillete', 'ریزر'],
-        
-        # Household (گھریلو اشیاء)
-        'hammam': ['hammam', 'hamaam', 'bath soap', 'حمام'],
-        'detergent': ['detergent', 'washing powder', 'soap powder', 'detergent powder', 'ariel', 'surf excel'],
-        'bleach': ['bleach', 'whitener', 'بلیچ'],
-        
-        # Snacks & Packaged (نمکین اشیاء)
-        'burger': ['burger', 'bugger', 'برگر'],
-        'biscuit': ['biscuit', 'cookie', 'bakery', 'bisconni', 'peak freans', 'بسکٹ'],
-        'chips': ['chips', 'crisps', 'lays', 'kurleez', 'چپس'],
-        'bread': ['bread', 'double roti', 'بریڈ'],
-        
-        # Eggs & Meat (انڈے اور گوشت)
-        'anday': ['anday', 'eggs', 'desi anday', 'anda', 'انڈے'],
-        'chicken': ['chicken', 'murghi', 'broiler', 'چکن'],
-        'beef': ['beef', 'gai ka gosht', 'بیف'],
-        'mutton': ['mutton', 'bakray ka gosht', 'مٹن'],
-        
-        # Vegetables & Fruits (سبزیاں اور پھل)
-        'aaloo': ['aaloo', 'potato', 'آلو'],
-        'pyaaz': ['pyaaz', 'onion', 'پیاز'],
-        'tamatar': ['tamatar', 'tomato', 'ٹماٹر'],
-        'kheera': ['kheera', 'cucumber', 'کھیرا'],
-        'apple': ['apple', 'saib', 'سیب'],
-        'banana': ['banana', 'kela', 'کیلا'],
-        
-        # Other Common Terms
-        'total': ['total', 'tota', 'ٹوٹل', 'کل', 'مجموعی', 'total amount', 'grand total'],
-        'udhaar': ['udhaar', 'udhar', 'u dhara', 'ادھار', 'باقی', 'بقایا'],
-        'wasooli': ['wasooli', 'wasuli', 'وصولی', 'وصول', 'جمع'],
-        'cash': ['cash', 'نقد', 'paid', 'ادا', 'cash paid'],
-        'date': ['date', 'تاریخ'],
-        'name': ['name', 'customer', 'client', 'نام', 'گاہک', 'بندہ', 'جناب'],
-    }
-    
-    # Unit mappings
-    UNITS = {
-        'kg': ['kg', 'kgs', 'kilo', 'kilogram', 'کلو', 'کلوگرام'],
-        'g': ['g', 'gm', 'gram', 'گرام'],
-        'liter': ['liter', 'litre', 'ltr', 'l', 'لیٹر'],
-        'ml': ['ml', 'milliliter', 'ملی لیٹر'],
-        'dozen': ['dozen', 'dz', 'ڈزن'],
-        'pc': ['pc', 'pcs', 'piece', 'pieces', 'عدد'],
-        'packet': ['packet', 'pkt', 'pack', 'پیکٹ'],
-    }
-    
-    @classmethod
-    def normalize_item_name(cls, name: str) -> Tuple[str, float]:
-        """
-        Normalize item name using semantic lexicon.
-        Returns (normalized_name, confidence_score)
-        """
-        name_clean = name.lower().strip()
-        
-        # Direct match
-        for standard, variants in cls.LEXICON.items():
-            if name_clean in variants:
-                return standard, 0.95
-                
-        # Fuzzy match using rapidfuzz
-        all_variants = {}
-        for standard, variants in cls.LEXICON.items():
-            for variant in variants:
-                all_variants[variant] = standard
-        
-        if all_variants:
-            try:
-                hit = fuzzy_process.extractOne(name_clean, all_variants.keys())
-                if hit:
-                    # rapidfuzz returns (choice, score, index) in newer versions.
-                    best_match = hit[0]
-                    score = float(hit[1]) / 100.0
-                    if score > 0.65:
-                        return all_variants[best_match], score
-            except Exception as exc:
-                logger.warning("Fuzzy item match skipped: %s", exc)
-        
-        return name, 0.5
-    
-    @classmethod
-    def normalize_unit(cls, text: str) -> str:
-        """Extract and normalize unit from text"""
-        text_lower = text.lower()
-        for unit, patterns in cls.UNITS.items():
-            for pattern in patterns:
-                if pattern in text_lower:
-                    return unit
-        return "pc"  # default unit
-    
-    @classmethod
-    def detect_transaction_type(cls, text: str) -> Tuple[str, float]:
-        """Detect transaction type (udhaar/wasooli/cash)"""
-        text_lower = text.lower()
-        
-        # Check udhaar
-        if 'udhaar' in cls.LEXICON:
-            for variant in cls.LEXICON['udhaar']:
-                if variant in text_lower:
-                    return "udhaar", 0.95
-        
-        # Check wasooli
-        if 'wasooli' in cls.LEXICON:
-            for variant in cls.LEXICON['wasooli']:
-                if variant in text_lower:
-                    return "wasooli", 0.95
-                    
-        # Check cash
-        if 'cash' in cls.LEXICON:
-            for variant in cls.LEXICON['cash']:
-                if variant in text_lower:
-                    return "cash", 0.85
-                    
-        return "unknown", 0.4
-
-
-# ============================================================================
-# INTELLIGENT PARSER (Extracts structured data from OCR lines)
-# ============================================================================
-
-class IntelligentParser:
-    """
-    Parses OCR text into structured items, total, and customer name.
-    Uses multiple strategies:
-    1. Pattern matching (regex)
-    2. Position-based inference
-    3. Semantic mapping
-    4. Mathematical validation
-    """
-    
-    def __init__(self, config: SystemConfig):
-        self.config = config
-
-    @staticmethod
-    def _name_blacklist() -> set[str]:
-        return {
-            "wasooli",
-            "wasuli",
-            "وصولی",
-            "وصول",
-            "udhaar",
-            "udhar",
-            "ادھار",
-            "baqaya",
-            "baki",
-            "بقایا",
-            "total",
-            "tota",
-            "ٹوٹل",
-            "کل",
-            "cash",
-            "discount",
-            "disc",
-            "ڈسکاؤنٹ",
-            "name",
-            "customer",
-            "receipt",
-        }
-
-    @staticmethod
-    def _name_headers() -> Tuple[str, ...]:
-        return ("name", "customer", "mr", "جناب", "نام", "mohtaram", "محترم")
-
-    def _clean_name_candidate(self, text: str) -> str:
-        t = normalize_ocr_text(text or "")
-        t = re.sub(r"[^\w\sء-ي]", " ", t)
-        t = re.sub(r"\s+", " ", t).strip()
-        return t
-
-    def _normalize_person_name(self, text: str) -> str:
-        """Normalize likely person names for Urdu/Roman Urdu slips."""
-        t = self._clean_name_candidate(text)
-        if not t:
-            return ""
-        # Remove honorific prefixes and common OCR junk tokens around names.
-        t = re.sub(r"(?i)\b(mr|mrs|ms|جناب|محترم|name|customer)\b[:\-]?\s*", "", t).strip()
-        # Common OCR confusions in Roman names.
-        replacements = {
-            "0": "o",
-            "1": "l",
-            "5": "s",
-            "  ": " ",
-        }
-        for src, dst in replacements.items():
-            t = t.replace(src, dst)
-        # Remove trailing ledger words if attached with the name.
-        t = re.sub(r"(?i)\b(total|wasooli|udhaar|baqaya|cash|discount)\b.*$", "", t).strip()
-        # High-precision roman-name OCR glitch fixes (avoid broad hallucination rules).
-        if t.strip().lower() == "unmar":
-            t = "Umar"
-        t = re.sub(r"\s+", " ", t).strip()
-        return t
-
-    @staticmethod
-    def _compute_line_total_price(
-        original_line: str,
-        quantity: float,
-        extracted_price: float,
-    ) -> Tuple[float, Optional[float], List[str]]:
-        """
-        Shopkeeper Rule:
-        - The number at the end of the item line is usually the **LINE TOTAL** (amount for that row).
-          e.g. "Milk 3 1200" => qty=3, line_total=1200 (UI should show 1200).
-        - Only if unit price is explicitly mentioned (e.g. "@ 400", "per 400", "/400") do we treat
-          extracted_price as unit price and compute line_total = qty * unit_price.
-
-        Returns: (line_total_price, unit_price_or_none, notes)
-        """
-        notes: List[str] = []
-        try:
-            q = float(quantity)
-            p = float(extracted_price)
-        except Exception:
-            return extracted_price, None, notes
-
-        if q <= 0 or p <= 0:
-            return extracted_price, None, notes
-
-        line = (original_line or "").lower()
-        unit_markers = ("@", " per ", "per-", "per/", "/")
-        explicit_unit = any(m in line for m in unit_markers)
-        if explicit_unit and q > 0:
-            line_total = float(round(q * p, 2))
-            notes.append(f"unit_price_detected_unit_{round(p,2)}_qty_{round(q,3)}_line_{round(line_total,2)}")
-            return line_total, p, notes
-
-        # Default: treat extracted number as line total.
-        return p, None, notes
-
-    def _valid_name_candidate(self, text: str) -> bool:
-        t = self._clean_name_candidate(text)
-        if len(t) < 2:
-            return False
-        low = t.lower()
-        if low in self._name_blacklist():
-            return False
-        if any(kw in low for kw in ("total", "wasool", "udhaar", "baq", "cash")):
-            return False
-        if sum(c.isalpha() for c in t) < 2:
-            return False
-        if sum(c.isdigit() for c in t) > 2:
-            return False
-        return True
-
-    @staticmethod
-    def _bbox_center(block: OCRTextBlock) -> Tuple[float, float]:
-        bb = block["bbox"]
-        cx = (float(bb[0][0]) + float(bb[2][0])) / 2.0
-        cy = (float(bb[0][1]) + float(bb[2][1])) / 2.0
-        return cx, cy
-
-    @staticmethod
-    def _bbox_yxxy(block: OCRTextBlock) -> List[float]:
-        """Return bbox as [y1, x1, y2, x2] for UI overlay payload."""
-        bb = block["bbox"]
-        xs = [float(p[0]) for p in bb]
-        ys = [float(p[1]) for p in bb]
-        return [min(ys), min(xs), max(ys), max(xs)]
-
-    def parse_customer_name(
-        self,
-        lines: List[str],
-        ocr_blocks: Optional[List[OCRTextBlock]] = None,
-        image_shape: Optional[Tuple[int, int]] = None,
-        top_roi_lines: Optional[List[str]] = None,
-    ) -> Tuple[Optional[str], float, List[str], Dict[str, Any]]:
-        """
-        Top-section priority anchor:
-        - Prefer top 15% OCR blocks
-        - Use header anchors (name/customer/mr)
-        - Avoid blacklist words (wasooli/total/baqaya)
-        - Hybrid cross-check: Paddle structure + EasyOCR handwriting
-        """
-        warnings_local: List[str] = []
-        hitl_data: Dict[str, Any] = {"name_review_required": False, "name_candidates": []}
-        top_name: Optional[str] = None
-        top_conf = 0.0
-
-        blocks = ocr_blocks or []
-        if blocks and image_shape is not None:
-            img_h = float(image_shape[0])
-            img_w = float(image_shape[1]) if len(image_shape) > 1 else 0.0
-            hitl_data["source_height"] = img_h
-            hitl_data["source_width"] = img_w
-            hitl_data["roi_ratio"] = 0.20
-            top_cutoff = 0.15 * img_h
-            top_blocks = [b for b in blocks if self._bbox_center(b)[1] <= top_cutoff]
-            top_blocks = sorted(top_blocks, key=lambda b: self._bbox_center(b)[1])
-
-            # Build top candidate payload (for HITL/manual tap UI).
-            candidates: List[Dict[str, Any]] = []
-            for b in top_blocks:
-                txt = self._clean_name_candidate(b["text"])
-                if not self._valid_name_candidate(txt):
-                    continue
-                candidates.append(
-                    {
-                        "text": txt,
-                        "bbox": self._bbox_yxxy(b),
-                        "conf": round(float(b["confidence"]), 3),
-                    }
-                )
-            # Deduplicate candidate texts while preserving best confidence first.
-            uniq: Dict[str, Dict[str, Any]] = {}
-            for c in sorted(candidates, key=lambda x: x["conf"], reverse=True):
-                k = c["text"].lower()
-                if k not in uniq:
-                    uniq[k] = c
-            hitl_data["name_candidates"] = list(uniq.values())[:5]
-
-            # 1) Anchor by explicit Name/Customer-like headers in Paddle/fused blocks.
-            anchors = [
-                b
-                for b in top_blocks
-                if any(h in b["text"].lower() for h in self._name_headers())
-                and b["engine"] in ("paddle", "fused_paddle_easy", "merged")
-            ]
-            if anchors:
-                anchor = anchors[0]
-                ax, ay = self._bbox_center(anchor)
-                # If the header itself contains "Name: XYZ", extract inline value first.
-                anchor_text = normalize_ocr_text(anchor["text"])
-                inline = re.sub(r"(?i).{0,20}(name|customer|mr|جناب|نام)\s*[:\-]?\s*", "", anchor_text).strip()
-                if self._valid_name_candidate(inline) and not any(h in inline.lower() for h in self._name_headers()):
-                    top_name = inline
-                    top_conf = max(top_conf, 0.91)
-                nearby = []
-                for b in top_blocks:
-                    bx, by = self._bbox_center(b)
-                    if by >= ay and abs(by - ay) <= (0.12 * img_h) and abs(bx - ax) <= 260:
-                        nearby.append(b)
-                # Prefer non-header line adjacent/below anchor
-                for cand in nearby:
-                    txt = self._clean_name_candidate(cand["text"])
-                    if self._valid_name_candidate(txt) and not any(h in txt.lower() for h in self._name_headers()):
-                        top_name = txt
-                        top_conf = max(top_conf, 0.90)
-                        break
-
-            # 2) If anchor failed, score top candidates and pick the best non-transaction token.
-            if not top_name:
-                ranked: List[Tuple[float, str]] = []
-                for b in top_blocks:
-                    txt = self._clean_name_candidate(b["text"])
-                    if self._valid_name_candidate(txt):
-                        low = txt.lower()
-                        score = float(b["confidence"])
-                        # Prefer shorter human-like names and top-most region.
-                        score += 0.10 if len(txt.split()) <= 3 else 0.0
-                        score += 0.06 if len(txt) <= 24 else 0.0
-                        score += 0.08 if " " in txt else 0.0
-                        if any(k in low for k in ("total", "udhaar", "wasooli", "cash", "baqaya", "discount")):
-                            score -= 0.35
-                        score -= 0.10 if any(ch.isdigit() for ch in txt) else 0.0
-                        ranked.append((score, txt))
-                if ranked:
-                    ranked.sort(key=lambda x: x[0], reverse=True)
-                    top_name = ranked[0][1]
-                    top_conf = max(top_conf, min(0.90, max(0.70, ranked[0][0])))
-
-            # 3) Hybrid cross-check: compare paddle-layout candidate with easyocr nearby.
-            if top_name:
-                easy_neighbors = []
-                for b in top_blocks:
-                    if b["engine"] != "easyocr":
-                        continue
-                    et = self._clean_name_candidate(b["text"])
-                    if self._valid_name_candidate(et):
-                        easy_neighbors.append(et)
-                if easy_neighbors:
-                    # choose closest-length neighbor as proxy similarity target
-                    easy_best = min(easy_neighbors, key=lambda s: abs(len(s) - len(top_name)))
-                    a = re.sub(r"\s+", "", top_name.lower())
-                    b = re.sub(r"\s+", "", easy_best.lower())
-                    overlap = len(set(a) & set(b)) / max(1, len(set(a) | set(b)))
-                    if overlap < 0.28 and top_name.lower() != easy_best.lower():
-                        warnings_local.append("manual_review_name_mismatch_paddle_easy")
-                        hitl_data["name_review_required"] = True
-                        # Avoid wrong confident guess ("wasooli" style): drop confidence if mismatch is high
-                        top_conf = min(top_conf, 0.66)
-
-        if top_name:
-            top_name = self._normalize_person_name(top_name)
-        if top_name and self._valid_name_candidate(top_name):
-            return top_name, min(0.95, top_conf), warnings_local, hitl_data
-
-        # Dedicated ROI OCR fallback before generic line fallback.
-        if top_roi_lines:
-            for i, line in enumerate(top_roi_lines[:8]):
-                cleaned = self._normalize_person_name(line)
-                if not self._valid_name_candidate(cleaned):
-                    continue
-                conf = max(0.70, 0.88 - (i * 0.05))
-                if hitl_data.get("name_candidates"):
-                    hitl_data["name_review_required"] = len(hitl_data["name_candidates"]) > 1
-                return cleaned, min(0.90, conf), warnings_local, hitl_data
-
-        # Fallback to top text lines (legacy)
-        for i, line in enumerate(lines[:6]):
-            cleaned = self._normalize_person_name(line.strip())
-            if not self._valid_name_candidate(cleaned):
-                continue
-            item_match, _ = PakistaniRetailLexicon.normalize_item_name(cleaned)
-            if item_match != cleaned.lower() and item_match in PakistaniRetailLexicon.LEXICON:
-                continue
-            confidence = 0.88 - (i * 0.08)
-            if len(cleaned) >= 3:
-                if hitl_data.get("name_candidates"):
-                    # If we fell back despite ROI candidates, request user confirmation.
-                    hitl_data["name_review_required"] = True
-                return cleaned.strip(), min(0.92, confidence), warnings_local, hitl_data
-
-        if hitl_data.get("name_candidates"):
-            hitl_data["name_review_required"] = True
-        return None, 0.0, warnings_local, hitl_data
-    
-    def parse_items(self, lines: List[str]) -> Tuple[List[ExtractedItem], float]:
-        """v5.2: ULTRA-LENIENT item extraction - capture everything or label as uncategorized"""
-        items = []
-        confidences = []
-
-        expanded: List[str] = []
-        for raw in lines:
-            for piece in sliding_split_multi_price_line(raw):
-                expanded.append(piece)
-
-        # v5.2: LENIENT regex patterns for fallback
-        pattern1 = re.compile(r'([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)\s*(?:kg|kgs?|g|gm|gram|liter|ml|dozen)?\s+(\d+(?:\.\d+)?)')
-        pattern2 = re.compile(r'([a-zA-Zء-ي\s]+?)\s*[\-\*–—]+\s*(\d+(?:\.\d+)?)\s*(?:kg|kgs?)?\s+(\d+(?:\.\d+)?)')
-        pattern3 = re.compile(r'([a-zA-Zء-ي\s]+?)\s+(\d+(?:\.\d+)?)')  # Ultra-lenient: item + any number
-
-        for line in expanded:
-            # Skip obvious non-item lines
-            line_lower = line.lower()
-            skip_keywords = ['date', 'total', 'udhaar', 'wasooli', 'cash', 'name', 'customer', 'time', 'receipt']
-            if any(kw in line_lower for kw in skip_keywords):
-                continue
-            
-            # v5.2: Enhanced parsing chain - geometry → aggressive → regex → ULTRA-LENIENT fallback
-            parsed = parse_geometry_line(line)
-            if not parsed:
-                parsed = parse_aggressive_patterns(line)  # v5.2: Try aggressive patterns
-            if not parsed:
-                # Fallback to legacy regex patterns
-                match = pattern2.search(line) or pattern1.search(line)
-                if not match:
-                    # v5.2: ULTRA-LENIENT fallback - if any text + number at end, treat as item
-                    match = pattern3.search(line)
-                    if not match:
-                        continue
-                    groups = match.groups()
-                    if len(groups) >= 2:
-                        item_name = groups[0]
-                        try:
-                            price = float(groups[1].replace(',', ''))
-                        except:
-                            continue
-                        quantity = 1.0
-                        confidence = 0.50  # Lower for ultra-lenient
-                    else:
-                        continue
-                else:
-                    groups = match.groups()
-                    if len(groups) >= 3:
-                        item_name, qty_str, price_str = groups[0], groups[1], groups[2]
-                        quantity = float(qty_str)
-                        confidence = 0.68
-                        try:
-                            price = float(price_str.replace(',', ''))
-                        except:
-                            continue
-                    else:
-                        continue
-            else:
-                # Use parsed result from geometry or aggressive parser
-                item_name = parsed['text']
-                quantity = parsed['quantity']
-                price = parsed['price']
-                confidence = parsed['confidence']
-
-            # Shopkeeper Rule: treat extracted `price` as LINE TOTAL by default.
-            # Only compute qty*unit_price when explicit unit marker exists (e.g. "@ 400").
-            price, _unit_price, _price_notes = self._compute_line_total_price(
-                line, float(quantity), float(price)
-            )
-                
-            # v5.2: LENIENT validation - allow wider range
-            if price < 0.5 or price > self.config.MAX_REASONABLE_PRICE:
-                continue
-            if quantity <= 0 or quantity > self.config.MAX_QUANTITY:
-                quantity = 1.0
-                
-            # Clean item name (v5.2: preserve more text)
-            item_name = normalize_ocr_text(item_name)
-            item_name = re.sub(r'[^\w\sء-ي]', ' ', item_name)
-            item_name = re.sub(r'\s+', ' ', item_name).strip().lower()
-            
-            if not item_name or len(item_name) < 2:
-                continue
-            
-            # v5.2: Apply semantic normalization - but BYPASS DISCARDS
-            normalized_name, semantic_conf = PakistaniRetailLexicon.normalize_item_name(item_name)
-            unit = PakistaniRetailLexicon.normalize_unit(line)
-            
-            in_lexicon = (normalized_name != item_name) or (item_name in PakistaniRetailLexicon.LEXICON)
-            
-            # v5.2: BYPASS LEXICON FAILURE - label as 'Uncategorized Item' instead of discarding
-            if not in_lexicon:
-                semantic_conf = 0.40
-                normalized_name = f"Uncategorized: {item_name}"
-            else:
-                # Apply confidence boosting only if in lexicon
-                if semantic_conf > 0.5:
-                    confidence = max(confidence, semantic_conf * 0.85)
-                if in_lexicon:
-                    confidence += SystemConfig.CONF_BOOST_LEXICON
-            
-            # Clamp confidence
-            confidence = min(0.98, max(0.35, confidence))
-                
-            items.append(ExtractedItem(
-                name=normalized_name,
-                quantity=quantity,
-                price=price,
-                unit=unit,
-                confidence=min(0.92, confidence),
-                original_text=line,
-                semantic_match=normalized_name if semantic_conf > 0.65 else None,
-                semantic_score=semantic_conf
-            ))
-            confidences.append(confidence)
-                
-        # Keep row-level items (do not collapse duplicates) so mobile UI can
-        # render exactly what OCR detected (e.g. 3 lines => 3 editable rows).
-        
-        avg_confidence = sum(confidences) / len(confidences) if confidences else 0.5
-        return items, avg_confidence
-    
-    def _merge_duplicates(self, items: List[ExtractedItem]) -> List[ExtractedItem]:
-        """Merge duplicate items (same name, same unit)"""
-        merged = {}
-        
-        for item in items:
-            key = (item.name, item.unit)
-            if key in merged:
-                existing = merged[key]
-                existing.quantity += item.quantity
-                existing.price = max(existing.price, item.price)  # Keep highest price
-                existing.confidence = max(existing.confidence, item.confidence)
-            else:
-                merged[key] = item
-                
-        return list(merged.values())
-
-    def reconcile_items_from_ocr_gap(
-        self,
-        items: List[ExtractedItem],
-        total: float,
-        ocr_blocks: List[OCRTextBlock],
-    ) -> Tuple[List[ExtractedItem], List[str]]:
-        """
-        If total_amount and sum(items) disagree, search raw OCR blocks for a missing
-        price (e.g. 50 when gap is 50) that was never attached to a line item.
-        """
-        notes: List[str] = []
-        if total <= 0 or not ocr_blocks:
-            return items, notes
-        sub = sum(it.quantity * it.price for it in items)
-        gap = round(float(total) - float(sub), 2)
-        tol = max(2.0, 0.02 * max(float(total), 1.0))
-        if abs(gap) <= self.config.TOTAL_VALIDATION_TOLERANCE or gap <= 0 or gap > 50000:
-            return items, notes
-
-        used = {round(it.price, 2) for it in items}
-        for blk in ocr_blocks:
-            raw = str(blk["text"])
-            for val in extract_numbers(raw):
-                if round(val, 2) in used:
-                    continue
-                if abs(float(val) - gap) <= tol:
-                    name_guess = re.sub(r"[\d\.\s,\-\*#:]+", " ", raw).strip()
-                    if len(name_guess) < 2:
-                        name_guess = f"recovered_line"
-                    nn, sc = PakistaniRetailLexicon.normalize_item_name(name_guess.lower())
-                    items.append(
-                        ExtractedItem(
-                            name=nn,
-                            quantity=1.0,
-                            price=float(val),
-                            unit="pc",
-                            confidence=0.52,
-                            original_text=raw,
-                            semantic_match=nn if sc > 0.62 else None,
-                            semantic_score=sc,
-                        )
-                    )
-                    notes.append(f"gap_recovered_amount_{val}_for_delta_{gap}")
-                    return self._merge_duplicates(items), notes
-        return items, notes
-
-    def parse_total(self, lines: List[str], items: List[ExtractedItem]) -> Tuple[float, float]:
-        """Extract total amount with confidence"""
-        candidates = []
-        
-        # Strategy 1: Look for 'Total' keyword in last lines
-        for line in reversed(lines[-8:]):
-            line_lower = line.lower()
-            if 'total' in line_lower or 'ٹوٹل' in line_lower or 'کل' in line_lower:
-                numbers = re.findall(r'(\d+(?:\.\d+)?)', line)
-                if numbers:
-                    total = float(numbers[-1])
-                    if total > 100:  # Reasonable total
-                        return total, 0.95
-                        
-        # Strategy 2: Look for numbers near 'udhaar/wasooli'
-        for line in reversed(lines[-5:]):
-            line_lower = line.lower()
-            if any(kw in line_lower for kw in ['udhaar', 'wasooli', 'ادھار', 'وصولی']):
-                numbers = re.findall(r'(\d+(?:\.\d+)?)', line)
-                if numbers:
-                    total = float(numbers[-1])
-                    if total > 100:
-                        return total, 0.90
-        
-        # Strategy 3: v5.1 Smart Total Anchoring - use subset-sum for validation
-        if items:
-            all_prices = [item.quantity * item.price for item in items]
-            total_from_items = sum(all_prices)
-            
-            # Try to find valid subset using subset-sum algorithm
-            if total_from_items > 0:
-                # First check if all items sum correctly
-                all_text = ' '.join(lines[-3:])  # Check last 3 lines for explicit total
-                explicit_numbers = re.findall(r'\b(\d{3,}(?:\.\d+)?)\b', all_text)
-                if explicit_numbers:
-                    explicit_total = float(max(explicit_numbers))
-                    if abs(explicit_total - total_from_items) < 5:  # Within tolerance
-                        return explicit_total, 0.88  # Trust explicit total
-                
-                return total_from_items, 0.80
-                
-        # Strategy 4: Last number in document
-        all_text = ' '.join(lines)
-        numbers = re.findall(r'\b(\d{3,}(?:\.\d+)?)\b', all_text)
-        if numbers:
-            total = max(float(n) for n in numbers)
-            return total, 0.65
-            
-        return 0.0, 0.0
-    
-    def validate_math(self, items: List[ExtractedItem], total: float) -> Tuple[bool, float, List[str]]:
-        """Validate mathematical consistency"""
-        corrections = []
-        # Shopkeeper rule: item.price is the LINE TOTAL (not unit price).
-        calculated_total = sum(float(item.price) for item in items)
-        
-        if abs(calculated_total - total) <= SystemConfig.TOTAL_VALIDATION_TOLERANCE:
-            return True, total, corrections
-            
-        # If discrepancy, **always** prefer calculated total from items (no hallucinations).
-        # Keep paper total only as a warning/diagnostic signal.
-        if calculated_total > 0:
-            if total > 0:
-                corrections.append(f"paper_total_mismatch_paper_{round(float(total),2)}_computed_{round(float(calculated_total),2)}")
-            corrections.append(f"using_calculated_total_{round(float(calculated_total),2)}")
-            return True, float(calculated_total), corrections
-                
-        return False, total, corrections
-
-
-# ============================================================================
-# AGENTIC MATH & LOGIC VALIDATOR (Pass 4)
-# ============================================================================
-
-class AgenticMathValidator:
-    """
-    Pass 4: Self-correcting mathematical validation loop.
-    - Validates item sum vs total
-    - Triggers targeted re-examination on mismatches
-    - Suggests corrections
-    """
-    
-    def __init__(self, config: SystemConfig):
-        self.config = config
-        
-    def validate(self, items: List[ExtractedItem], total: float) -> Tuple[bool, float, List[str], float]:
-        """
-        Validate and correct mathematical inconsistencies.
-        Returns: (is_valid, corrected_total, corrections, overall_confidence)
-        """
-        corrections = []
-        
-        # Calculate sum
-        # Shopkeeper rule: item.price is the LINE TOTAL (not unit price).
-        item_sum = sum(float(item.price) for item in items)
-        
-        # Perfect match
-        if abs(item_sum - total) <= self.config.TOTAL_VALIDATION_TOLERANCE:
-            return True, total, corrections, 0.95
-            
-        # Check if total is zero or missing
-        if total == 0 and item_sum > 0:
-            corrections.append("total_missing_using_items_sum")
-            return True, item_sum, corrections, 0.85
-            
-        # Check if items sum is zero
-        if item_sum == 0 and total > 0:
-            corrections.append("items_missing_using_provided_total")
-            return True, total, corrections, 0.80
-            
-        # Calculate relative error
-        relative_error = abs(item_sum - total) / max(total, item_sum, 1)
-        
-        # Small relative error tolerance
-        if relative_error <= 0.05:  # Within 5%
-            corrections.append(f"small_discrepancy_{relative_error:.2%}_using_total")
-            return True, total, corrections, 0.90
-            
-        # Medium error - adjust total to items
-        if relative_error <= 0.15:
-            corrections.append(f"total_adjusted_from_{total}_to_{item_sum}")
-            return True, item_sum, corrections, 0.75
-            
-        # Large error - flag for human review
-        corrections.append(f"large_discrepancy_{relative_error:.2%}_needs_review")
-        return False, total, corrections, 0.50
-    
-    def calculate_confidence(self, items: List[ExtractedItem], math_validated: bool, total: float) -> Dict[str, float]:
-        """Calculate overall confidence scores"""
-        if not items:
-            return {"items": 0.0, "total": 0.0, "overall": 0.0}
-            
-        # Item confidence
-        item_conf = sum(item.confidence for item in items) / len(items)
-        
-        # Total confidence
-        total_conf = 0.95 if math_validated else 0.65
-        
-        # Transaction type confidence (default)
-        type_conf = 0.85 if total > 0 else 0.40
-        
-        # Overall confidence (weighted)
-        overall = (item_conf * 0.4 + total_conf * 0.4 + type_conf * 0.2)
-        
-        return {
-            "items": round(item_conf, 3),
-            "total": round(total_conf, 3),
-            "type": round(type_conf, 3),
-            "overall": round(overall, 3)
-        }
-
-
-# ============================================================================
-# ADVANCED CALCULATION ENGINE (Pass 3.5)
-# ============================================================================
-
-class AdvancedCalculationEngine:
-    """
-    Extra calculation intelligence for messy parchis:
-    - Detect subtotal/discount/paid/balance style numbers
-    - Score multiple total candidates
-    - Repair one likely wrong item price when near-match exists
-    - Reconcile final total using item sum + financial hints
-    """
 
-    def __init__(self, config: SystemConfig):
-        self.config = config
-
-    @staticmethod
-    def _line_numbers(line: str) -> List[float]:
-        vals: List[float] = []
-        for m in re.findall(r"\d+(?:\.\d+)?", line or ""):
-            try:
-                v = float(m)
-                if 0 <= v <= 999999:
-                    vals.append(v)
-            except Exception:
-                continue
-        return vals
-
-    @staticmethod
-    def _line_tag(line: str) -> str:
-        s = (line or "").lower()
-        if any(k in s for k in ("subtotal", "sub total", "ذیلی", "جمع")):
-            return "subtotal"
-        if any(k in s for k in ("discount", "disc", "رعایت", "ڈسکاؤنٹ")):
-            return "discount"
-        if any(k in s for k in ("paid", "ادا", "cash", "وصولی")):
-            return "paid"
-        if any(k in s for k in ("balance", "baqaya", "بقای", "بقایا", "udhaar", "ادھار")):
-            return "balance"
-        if any(k in s for k in ("total", "grand total", "کل", "ٹوٹل")):
-            return "total"
-        return "other"
-
-    def _extract_financial_hints(self, lines: List[str]) -> Dict[str, float]:
-        hints: Dict[str, float] = {}
-        for ln in lines:
-            tag = self._line_tag(ln)
-            nums = self._line_numbers(ln)
-            if not nums:
-                continue
-            v = float(nums[-1])
-            if v <= 0:
-                continue
-            # keep stronger/latest candidate by preferring larger values for totals,
-            # and larger discounts/paid when repeated.
-            prev = hints.get(tag)
-            if prev is None or v >= prev:
-                hints[tag] = v
-        return hints
-
-    def _candidate_totals(self, lines: List[str], items: List[ExtractedItem], initial_total: float) -> List[Tuple[float, float, str]]:
-        cands: List[Tuple[float, float, str]] = []
-        if initial_total > 0:
-            cands.append((initial_total, 0.78, "parser_total"))
-
-        item_sum = float(sum(i.quantity * i.price for i in items))
-        if item_sum > 0:
-            cands.append((item_sum, 0.74, "items_sum"))
-
-        hints = self._extract_financial_hints(lines)
-        if "total" in hints:
-            cands.append((hints["total"], 0.90, "line_total"))
-        if "balance" in hints:
-            cands.append((hints["balance"], 0.84, "line_balance"))
-        if "subtotal" in hints:
-            subtotal = hints["subtotal"]
-            discount = hints.get("discount", 0.0)
-            paid = hints.get("paid", 0.0)
-            # subtotal - discount - paid (if present) as strong ledger formula
-            calc = subtotal - discount - paid
-            if calc > 0:
-                cands.append((calc, 0.82, "subtotal_discount_paid_formula"))
-
-        # dedupe by rounded value while keeping best confidence source
-        best: Dict[float, Tuple[float, float, str]] = {}
-        for v, conf, src in cands:
-            k = round(v, 2)
-            if k not in best or conf > best[k][1]:
-                best[k] = (v, conf, src)
-        return sorted(best.values(), key=lambda x: x[1], reverse=True)
-
-    def _try_single_item_repair(self, items: List[ExtractedItem], target_total: float) -> Tuple[List[ExtractedItem], List[str]]:
-        notes: List[str] = []
-        if not items or target_total <= 0:
-            return items, notes
-        # Shopkeeper rule: item.price is line total.
-        current = float(sum(float(i.price) for i in items))
-        delta = round(target_total - current, 2)
-        if abs(delta) < 2.0:
-            return items, notes
-
-        # Try to repair one low-confidence item by adjusting its price by delta/qty.
-        ranked_idx = sorted(
-            range(len(items)),
-            key=lambda idx: (items[idx].confidence, items[idx].quantity),
-        )
-        for idx in ranked_idx[: min(4, len(ranked_idx))]:
-            it = items[idx]
-            new_price = round(float(it.price) + float(delta), 2)
-            if not (self.config.MIN_ITEM_PRICE <= new_price <= self.config.MAX_REASONABLE_PRICE):
-                continue
-            old = it.price
-            it.price = new_price
-            it.confidence = max(0.35, min(0.90, it.confidence - 0.05))
-            notes.append(f"single_item_price_repair_{it.name}_{old}_to_{new_price}")
-            break
-        return items, notes
-
-    def reconcile(
-        self,
-        lines: List[str],
-        items: List[ExtractedItem],
-        parsed_total: float,
-    ) -> Tuple[List[ExtractedItem], float, List[str], float]:
-        """
-        Returns:
-          (possibly adjusted items, reconciled_total, notes, confidence)
-        """
-        notes: List[str] = []
-        candidates = self._candidate_totals(lines, items, parsed_total)
-        if not candidates:
-            return items, parsed_total, notes, 0.45
-
-        # Pick best candidate by confidence, but prefer consistency with items where close.
-        item_sum = float(sum(float(i.price) for i in items))
-        best_v, best_c, best_src = candidates[0]
-        if item_sum > 0:
-            for v, c, src in candidates:
-                rel = abs(v - item_sum) / max(v, item_sum, 1.0)
-                if rel <= 0.08 and c >= 0.72:
-                    best_v, best_c, best_src = v, max(c, 0.88), src
-                    notes.append(f"candidate_selected_by_item_consistency_{src}_{v}")
-                    break
+def line_text(line: list[dict]) -> str:
+    return " ".join(d["text"] for d in line)
 
-        notes.append(f"calc_engine_selected_{best_src}_{round(best_v,2)}")
-        pre_items = float(sum(float(i.price) for i in items))
-        rel_gap = abs(pre_items - best_v) / max(pre_items, best_v, 1.0) if (pre_items > 0 and best_v > 0) else 0.0
-        if rel_gap > 0.12:
-            items, fix_notes = self._try_single_item_repair(items, best_v)
-            notes.extend(fix_notes)
+# ---------------------------------------------------------------------------
+# Generic item parser
+# ---------------------------------------------------------------------------
 
-        final_items_sum = float(sum(float(i.price) for i in items))
-        final_gap = abs(final_items_sum - best_v) / max(final_items_sum, best_v, 1.0) if (final_items_sum > 0 and best_v > 0) else 0.0
-        final_conf = max(0.50, min(0.95, best_c - min(0.20, final_gap * 0.5)))
+def _is_text_token(tok: str) -> bool:
+    return bool(re.search(r"[a-zA-Z\u0600-\u06FF]", tok))
 
-        # If still too far and item sum looks reliable, pivot to item sum.
-        if final_items_sum > 0 and final_gap > 0.22:
-            notes.append(f"calc_engine_pivot_to_item_sum_from_{best_v}_to_{final_items_sum}")
-            return items, final_items_sum, notes, max(0.68, final_conf - 0.08)
 
-        return items, best_v, notes, final_conf
+def _is_num_token(tok: str) -> bool:
+    return bool(re.fullmatch(r"[\d,./]+", tok))
 
 
-# ============================================================================
-# HUMAN-IN-THE-LOOP (HITL) FEEDBACK SYSTEM
-# ============================================================================
+def parse_item_from_tokens(tokens: list[str]) -> dict | None:
+    """
+    Pattern matching (order matters):
+      [text] [num] [num]           → name qty price  OR name price unit
+      [num] [text] [num]           → qty  name price
+      [text]-[num] [num]           → name qty price
+      [text] [num]                 → name price (qty=1)
+    Returns None if no valid item detected.
+    """
+    tokens = merge_spaced_digits(tokens)
+    text_toks  = [t for t in tokens if _is_text_token(t)]
+    num_toks   = [t for t in tokens if _is_num_token(t)]
 
-class UserFeedback(BaseModel):
-    """User correction feedback for model improvement"""
-    request_id: str
-    corrected_items: List[ExtractedItem]
-    corrected_total: Optional[float] = None
-    corrected_customer: Optional[str] = None
-    transaction_type: Optional[str] = None
-    feedback_notes: str = ""
-    timestamp: Optional[str] = None
+    # Skip lines with no text or no numbers
+    if not text_toks or not num_toks:
+        return None
 
+    nums = [float(t.replace(",", "")) for t in num_toks]
+    name_raw = " ".join(text_toks)
+    name = lexicon_correct(name_raw.strip())
 
-class FeedbackLearner:
-    """
-    HITL learning system: aggregates user corrections to identify patterns.
-    Uses in-memory learning with zero persistence (ZDR compliance).
-    """
-    
-    def __init__(self, max_feedback_items: int = 500):
-        self.feedback_history = defaultdict(list)  # {item_name: [corrections]}
-        self.error_patterns = defaultdict(int)  # {pattern: frequency}
-        self.max_items = max_feedback_items
-        self.total_corrections = 0
-        self.last_cleared = datetime.now()
-        
-    def add_feedback(self, feedback: UserFeedback) -> Dict[str, Any]:
-        """Process user feedback and learn patterns"""
-        patterns = []
-        
-        for item in feedback.corrected_items:
-            self.feedback_history[item.name].append({
-                "price": item.price,
-                "quantity": item.quantity,
-                "confidence_original": item.confidence,
-                "timestamp": datetime.now().isoformat()
-            })
-            
-            # Detect error pattern
-            if item.semantic_score < 0.7 and item.semantic_match:
-                pattern = f"confuse_{item.semantic_match}_with_{item.name}"
-                self.error_patterns[pattern] += 1
-                patterns.append(pattern)
-        
-        self.total_corrections += len(feedback.corrected_items)
-        
-        # Age out old feedback (ZDR: only keep recent corrections)
-        if len(self.feedback_history) > self.max_items:
-            oldest_item = min(
-                self.feedback_history.items(),
-                key=lambda x: x[1][-1]["timestamp"] if x[1] else "0"
-            )
-            del self.feedback_history[oldest_item[0]]
-        
-        logger.info(f"Feedback recorded: {feedback.request_id} | Patterns: {patterns}")
-        
-        return {
-            "status": "feedback_recorded",
-            "patterns_detected": patterns,
-            "total_corrections_aggregated": self.total_corrections
-        }
-    
-    def get_high_confidence_corrections(self) -> Dict[str, float]:
-        """Return learned price/qty mappings with high frequency"""
-        corrections = {}
-        for item_name, corrections_list in self.feedback_history.items():
-            if len(corrections_list) >= 3:  # Need 3+ corrections to be confident
-                avg_price = np.mean([c["price"] for c in corrections_list])
-                avg_qty = np.mean([c["quantity"] for c in corrections_list])
-                corrections[item_name] = {"price": avg_price, "qty": avg_qty}
-        return corrections
-    
-    def get_error_insights(self) -> Dict[str, Any]:
-        """Provide insights into common OCR mistakes"""
-        if not self.error_patterns:
-            return {"insights": "No error patterns detected yet"}
-        
-        top_errors = sorted(
-            self.error_patterns.items(),
-            key=lambda x: x[1],
-            reverse=True
-        )[:5]
-        
-        return {
-            "top_error_patterns": [{"pattern": p, "frequency": f} for p, f in top_errors],
-            "total_feedbacks": self.total_corrections,
-            "unique_items_learned": len(self.feedback_history)
-        }
-    
-    def clear_old_data(self, hours: int = 24):
-        """ZDR: Clear old feedback data periodically"""
-        now = datetime.now()
-        cutoff_time = (now - threading.Event()).replace(hour=now.hour - hours)
-        
-        cleared = 0
-        for item_name in list(self.feedback_history.keys()):
-            self.feedback_history[item_name] = [
-                c for c in self.feedback_history[item_name]
-                if c["timestamp"] > cutoff_time.isoformat()
-            ]
-            if not self.feedback_history[item_name]:
-                del self.feedback_history[item_name]
-                cleared += 1
-        
-        self.last_cleared = now
-        logger.info(f"ZDR cleanup: Cleared {cleared} old item records")
-        return {"status": "cleanup_complete", "items_cleared": cleared}
-
-
-# ============================================================================
-# ZERO DATA RETENTION (ZDR) COMPLIANCE MANAGER
-# ============================================================================
-
-class ZDRCompliance:
-    """
-    Ensure zero persistent storage of user data.
-    All processing is in-memory and ephemeral.
-    """
-    
-    def __init__(self):
-        self.request_cache = {}  # {request_id: {data}, expires_at}
-        self.cache_ttl = 3600  # 1 hour
-        self.last_cleanup = time.time()
-        
-    def store_request_data(self, request_id: str, result: ProcessingResult) -> None:
-        """Store only during active session"""
-        self.request_cache[request_id] = {
-            "result": result,
-            "expires_at": time.time() + self.cache_ttl
-        }
-        
-        # Auto cleanup every 100 requests
-        if len(self.request_cache) % 100 == 0:
-            self._cleanup_expired()
-    
-    def retrieve_request_data(self, request_id: str) -> Optional[ProcessingResult]:
-        """Retrieve only if not expired"""
-        if request_id not in self.request_cache:
-            return None
-        
-        entry = self.request_cache[request_id]
-        if time.time() > entry["expires_at"]:
-            del self.request_cache[request_id]
+    if len(nums) >= 2:
+        if _is_num_token(tokens[0]):
+            qty, price = nums[0], nums[-1]
+        else:
+            qty, price = nums[0], nums[-1]
+        # sanity guards (from Config)
+        if qty > Config.MAX_ITEM_QTY:
+            qty, price = 1.0, nums[-1]
+        if price < Config.MIN_ITEM_PRICE or price > Config.MAX_ITEM_PRICE:
+            return None           # noise – don't fabricate an item
+        return {"name": name, "quantity": qty, "price": price}
+    else:
+        price = nums[0]
+        if price < Config.MIN_ITEM_PRICE or price > Config.MAX_ITEM_PRICE:
             return None
-        
-        return entry["result"]
-    
-    def _cleanup_expired(self) -> int:
-        """Remove expired entries"""
-        now = time.time()
-        expired_ids = [
-            rid for rid, entry in self.request_cache.items()
-            if now > entry["expires_at"]
-        ]
-        for rid in expired_ids:
-            del self.request_cache[rid]
-        
-        logger.info(f"ZDR cleanup: Removed {len(expired_ids)} expired requests")
-        return len(expired_ids)
-    
-    def get_compliance_status(self) -> Dict[str, Any]:
-        """Return compliance status"""
-        self._cleanup_expired()
-        return {
-            "zdr_enabled": True,
-            "cache_ttl_seconds": self.cache_ttl,
-            "active_requests": len(self.request_cache),
-            "last_cleanup": datetime.fromtimestamp(self.last_cleanup).isoformat(),
-            "privacy_level": "HIPAA-compliant (no persistent storage)"
-        }
-
-
-# ============================================================================
-# ADVANCED AGENTIC SELF-CORRECTION LOOP (Pass 4 Enhanced)
-# ============================================================================
-
-class AgenticSelfCorrector:
-    """
-    Intelligent multi-agent self-correction based on logical constraints:
-    - Price must be reasonable for item
-    - Total must equal sum of (price × quantity)
-    - Items must match known retail lexicon
-    - Quantities must be valid units
-    """
-    
-    def __init__(self, feedback_learner: FeedbackLearner):
-        self.learner = feedback_learner
-        self.correction_log = []
-    
-    def apply_learned_corrections(self, items: List[ExtractedItem]) -> Tuple[List[ExtractedItem], List[str]]:
-        """Apply corrections based on HITL feedback"""
-        corrections = []
-        high_conf_corrections = self.learner.get_high_confidence_corrections()
-        
-        for item in items:
-            if item.name in high_conf_corrections:
-                learned = high_conf_corrections[item.name]
-                
-                # If current extraction is low confidence, apply learned correction
-                if item.confidence < 0.75:
-                    old_price = item.price
-                    old_qty = item.quantity
-                    
-                    item.price = learned["price"]
-                    item.quantity = learned["qty"]
-                    item.confidence = min(0.95, item.confidence + 0.15)
-                    
-                    corrections.append(
-                        f"Applied learned correction for {item.name}: "
-                        f"${old_price} → ${item.price}, qty {old_qty} → {item.quantity}"
-                    )
-        
-        return items, corrections
-    
-    def validate_item_prices(self, items: List[ExtractedItem]) -> Tuple[List[ExtractedItem], List[str]]:
-        """
-        Validate prices against Pakistani retail knowledge:
-        - Vegetables/Fruits: 50-500 PKR/kg
-        - Dairy: 100-1000 PKR
-        - Grains: 40-300 PKR/kg
-        """
-        corrections = []
-        
-        price_ranges = {
-            "vegetables": (50, 500),
-            "fruits": (50, 500),
-            "dairy": (100, 1000),
-            "meat": (300, 2000),
-            "grains": (40, 300),
-            "spices": (200, 3000),
-            "general": (10, 50000)
-        }
-        
-        for item in items:
-            category = "general"
-            item_lower = item.name.lower()
-            
-            for cat in price_ranges:
-                if cat in item_lower or item_lower in cat:
-                    category = cat
-                    break
-            
-            min_price, max_price = price_ranges[category]
-            
-            if item.price < min_price or item.price > max_price:
-                old_price = item.price
-                item.price = max(min_price, min(item.price, max_price))
-                item.confidence *= 0.9  # Reduce confidence for adjusted price
-                
-                corrections.append(
-                    f"Price validation: {item.name} ${old_price} → ${item.price} "
-                    f"(valid range: ${min_price}-${max_price})"
-                )
-        
-        return items, corrections
-    
-    def apply_mathematical_corrections(self, items: List[ExtractedItem], reported_total: float) -> Tuple[float, List[str]]:
-        """
-        Self-correct using mathematical constraints:
-        1. Calculate sum of (qty × price)
-        2. If sum != reported_total, identify which item(s) might be wrong
-        3. Apply Bayesian adjustment based on confidence
-        """
-        corrections = []
-        calculated_sum = sum(item.quantity * item.price for item in items)
-        
-        if abs(calculated_sum - reported_total) > 2.0:
-            # Find lowest-confidence items and adjust
-            items_by_conf = sorted(items, key=lambda x: x.confidence)
-            
-            diff = reported_total - calculated_sum
-            
-            # Try to fix using lowest confidence items
-            for item in items_by_conf[:min(2, len(items))]:
-                if item.confidence < 0.80:
-                    adjustment = diff / (item.quantity or 1)
-                    item.price += adjustment
-                    calculated_sum = sum(i.quantity * i.price for i in items)
-                    
-                    corrections.append(
-                        f"Mathematical correction: {item.name} adjusted "
-                        f"(diff was {diff:.2f} PKR, confidence was {item.confidence:.2f})"
-                    )
-                    
-                    if abs(calculated_sum - reported_total) < 2.0:
-                        break
-        
-        return reported_total, corrections
-
-
-# ============================================================================
-# IMAGE-HASH LRU CACHE (v6.0) — ZDR-compliant, TTL-1h, max 100 entries
-# ============================================================================
-
-class _ImageHashCache:
-    """
-    Thread-safe in-memory cache keyed by SHA-256 of the raw image bytes.
-    Entries expire after CACHE_TTL seconds (default 3600 = 1 hour).
-    Maximum MAX_CACHE_SIZE entries; oldest entry evicted on overflow.
-    No images or PII are stored — only the structured ProcessingResult.
-    """
+        return {"name": name, "quantity": 1.0, "price": price}
 
-    def __init__(self, ttl: int = 3600, max_size: int = 100):
-        self._store: dict = {}        # {hash_hex: {"result": ..., "expires_at": float}}
-        self._order: list = []        # insertion order for LRU eviction
-        self._lock = threading.Lock()
-        self.ttl = ttl
-        self.max_size = max_size
-
-    def _key(self, image_bytes: bytes) -> str:
-        return hashlib.sha256(image_bytes).hexdigest()
-
-    def get(self, image_bytes: bytes):
-        """Return cached ProcessingResult or None (cache miss / expired)."""
-        key = self._key(image_bytes)
-        with self._lock:
-            entry = self._store.get(key)
-            if entry is None:
-                return None
-            if time.time() > entry["expires_at"]:
-                # Expired — evict
-                self._store.pop(key, None)
-                if key in self._order:
-                    self._order.remove(key)
-                return None
-            return entry["result"]
-
-    def set(self, image_bytes: bytes, result) -> None:
-        """Store result for image_bytes. Evict LRU entry if at capacity."""
-        key = self._key(image_bytes)
-        with self._lock:
-            # Evict expired entries first
-            now = time.time()
-            expired = [k for k, v in self._store.items() if now > v["expires_at"]]
-            for k in expired:
-                self._store.pop(k, None)
-                if k in self._order:
-                    self._order.remove(k)
-            # LRU eviction if still full
-            while len(self._store) >= self.max_size and self._order:
-                oldest = self._order.pop(0)
-                self._store.pop(oldest, None)
-            self._store[key] = {"result": result, "expires_at": now + self.ttl}
-            if key in self._order:
-                self._order.remove(key)
-            self._order.append(key)
-
-    def stats(self) -> dict:
-        with self._lock:
-            return {"entries": len(self._store), "max_size": self.max_size, "ttl_seconds": self.ttl}
-
-
-# Global cache instance
-_IMAGE_CACHE = _ImageHashCache(ttl=SystemConfig.CACHE_TTL, max_size=SystemConfig.MAX_CACHE_SIZE)
-
-# Concurrency guard: max 2 simultaneous OCR/VLM requests to prevent OOM
-_REQUEST_SEMAPHORE = asyncio.Semaphore(2)
-
-
-# ============================================================================
-# VLM ENGINE (v6.0) — Qwen2-VL-2B-Instruct, CPU fp32, NO bitsandbytes
-# ============================================================================
-
-class QwenVLMEngine:
-    """
-    Wraps Qwen2-VL-2B-Instruct for full-page parchi extraction.
-
-    Design decisions for CPU / 16 GB RAM:
-    • dtype=torch.float32 — bfloat16 is unreliable on some CPU builds.
-    • device_map="cpu"   — explicit, no auto-GPU fallback.
-    • Lazy loading       — model is NOT loaded at import time; first call
-      triggers load so startup is fast and HF health probe passes.
-    • Memory guard       — if RSS > VLM_MEMORY_LIMIT_MB after load, the
-      engine self-disables and the fallback OCR ensemble takes over.
-    • Thread lock        — only one inference thread at a time.
-    """
 
-    #: Prompt sent to Qwen2-VL for structured parchi extraction.
-    _SYSTEM_PROMPT = (
-        "You are an expert OCR assistant for Pakistani handwritten receipts (parchi). "
-        "Extract ALL text exactly as written, preserving Urdu and English. "
-        "For each line, output: item_name | quantity | unit | price. "
-        "At the end output TOTAL: <amount> on its own line. "
-        "If a field is missing use 'N/A'. Do not invent data."
-    )
-
-    def __init__(self, config: SystemConfig):
-        self.config = config
-        self._model = None
-        self._processor = None
-        self._loaded = False
-        self._disabled = False          # set True if load/memory guard fails
-        self._lock = threading.Lock()
-
-    # ------------------------------------------------------------------
-    # Lazy loader
-    # ------------------------------------------------------------------
-
-    def _load(self) -> bool:
-        """
-        Load model + processor synchronously (called once from a thread).
-        Returns True on success, False on failure.
-        """
-        if self._loaded or self._disabled:
-            return self._loaded and not self._disabled
-
-        if not (TRANSFORMERS_AVAILABLE and SystemConfig.ENABLE_VLM):
-            logger.info("VLM disabled (ENABLE_VLM=0 or transformers unavailable).")
-            self._disabled = True
-            return False
-
-        if not TORCH_AVAILABLE or torch is None:
-            logger.warning("VLM skipped: torch not available.")
-            self._disabled = True
-            return False
-
-        model_id = self.config.VLM_MODEL_ID
-        logger.info("Loading VLM %s on CPU (fp32) — this may take 60-120 s...", model_id)
-        rss_before = _get_rss_mb()
-        try:
-            self._processor = AutoProcessor.from_pretrained(
-                model_id,
-                trust_remote_code=True,
-                # Cache weights to HF_HOME (/.cache by default in Docker)
-                cache_dir=os.getenv("TRANSFORMERS_CACHE", "/.cache"),
-            )
-            self._model = Qwen2VLForConditionalGeneration.from_pretrained(
-                model_id,
-                torch_dtype=torch.float32,   # fp32 — safest on CPU
-                device_map="cpu",
-                trust_remote_code=True,
-                cache_dir=os.getenv("TRANSFORMERS_CACHE", "/.cache"),
-            )
-            self._model.eval()  # inference-only mode
-
-            rss_after = _get_rss_mb()
-            delta = rss_after - rss_before
-            logger.info(
-                "VLM loaded | RSS before=%.0fMB after=%.0fMB delta=%.0fMB",
-                rss_before, rss_after, delta,
-            )
-
-            # Memory safety guard
-            if rss_after > self.config.VLM_MEMORY_LIMIT_MB:
-                logger.error(
-                    "VLM RSS %.0f MB exceeds limit %.0f MB — disabling VLM.",
-                    rss_after, self.config.VLM_MEMORY_LIMIT_MB,
-                )
-                self._model = None
-                self._processor = None
-                _free_memory()
-                self._disabled = True
-                return False
+def parse_item_from_line(line: list[dict]) -> dict | None:
+    raw_text = line_text(line)
+    normalised = normalise_text(raw_text)
+    tokens = normalised.split()
+    result = parse_item_from_tokens(tokens)
+    if result is None:
+        return None
+    # avg confidence for the line
+    conf = float(np.mean([d["conf"] for d in line]))
+    result["confidence"]     = round(conf, 3)
+    result["low_confidence"] = conf < 0.50
+    result["unit"]           = _detect_unit(normalised)
+    return result
 
-            self._loaded = True
-            return True
 
-        except Exception as exc:
-            logger.error("VLM load failed: %s", exc, exc_info=True)
-            self._model = None
-            self._processor = None
-            _free_memory()
-            self._disabled = True
-            return False
-
-    # ------------------------------------------------------------------
-    # Inference
-    # ------------------------------------------------------------------
-
-    def extract(self, pil_image: "Image.Image") -> Optional[str]:
-        """
-        Run Qwen2-VL on a PIL image and return raw text output.
-        Returns None if VLM is disabled or inference fails.
-        Runs synchronously (CPU-bound); caller wraps in asyncio.to_thread.
-        """
-        with self._lock:
-            if not self._load():
-                return None
-
-            try:
-                # Build the multi-modal message payload
-                messages = [
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "image", "image": pil_image},
-                            {"type": "text",  "text": self._SYSTEM_PROMPT},
-                        ],
-                    }
-                ]
-
-                # Prepare inputs (qwen_vl_utils path or fallback)
-                if QWEN_VL_UTILS_AVAILABLE and process_vision_info is not None:
-                    image_inputs, video_inputs = process_vision_info(messages)
-                    text = self._processor.apply_chat_template(
-                        messages, tokenize=False, add_generation_prompt=True
-                    )
-                    inputs = self._processor(
-                        text=[text],
-                        images=image_inputs,
-                        videos=video_inputs,
-                        padding=True,
-                        return_tensors="pt",
-                    )
-                else:
-                    # Minimal fallback without qwen_vl_utils
-                    text = self._processor.apply_chat_template(
-                        messages, tokenize=False, add_generation_prompt=True
-                    )
-                    inputs = self._processor(
-                        text=[text],
-                        images=[pil_image],
-                        padding=True,
-                        return_tensors="pt",
-                    )
-
-                inputs = inputs.to("cpu")
-
-                with torch.no_grad():
-                    output_ids = self._model.generate(
-                        **inputs,
-                        max_new_tokens=self.config.VLM_MAX_NEW_TOKENS,
-                        do_sample=False,         # deterministic on CPU
-                        temperature=None,
-                        top_p=None,
-                    )
-
-                # Decode only the newly generated tokens
-                generated = output_ids[:, inputs.input_ids.shape[1]:]
-                result = self._processor.batch_decode(
-                    generated, skip_special_tokens=True, clean_up_tokenization_spaces=True
-                )
-                return result[0].strip() if result else None
-
-            except Exception as exc:
-                logger.error("VLM inference error: %s", exc)
-                return None
-
-    # ------------------------------------------------------------------
-    # VLM text → structured lines
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def parse_vlm_output(raw: str) -> Tuple[List[str], Optional[float]]:
-        """
-        Convert VLM pipe-delimited output into plain text lines + optional total.
-        Lines look like:  "atta | 2 | kg | 240"  or freeform if the model deviates.
-        Returns (lines, total_from_vlm_or_None).
-        """
-        if not raw:
-            return [], None
-
-        lines: List[str] = []
-        vlm_total: Optional[float] = None
-
-        for raw_line in raw.splitlines():
-            stripped = raw_line.strip()
-            if not stripped:
-                continue
+def _detect_unit(text: str) -> str:
+    t = text.lower()
+    for u, pats in {
+        "kg":    ["kg", "kilo", "کلو"],
+        "g":     ["gram", "gm", " g "],
+        "liter": ["liter", "litre", "ltr", "لیٹر"],
+        "ml":    ["ml", "milliliter"],
+        "dozen": ["dozen", "dz", "درجن"],
+        "pc":    ["pc", "piece", "pcs", "عدد"],
+    }.items():
+        if any(p in t for p in pats):
+            return u
+    return "pc"
+
+# ---------------------------------------------------------------------------
+# Customer name extraction (top 15 % of image)
+# ---------------------------------------------------------------------------
+
+# keywords that disqualify a line from being a customer name
+_NAME_SKIP = re.compile(
+    r"(date|time|total|udhaar|wasooli|cash|receipt|shop|دکان|تاریخ)", re.I
+)
 
-            # Detect explicit TOTAL line produced by the prompt
-            if re.match(r'(?i)^total\s*[:=]?\s*', stripped):
-                nums = re.findall(r'[\d,\.]+', stripped)
-                for n in reversed(nums):
-                    try:
-                        v = float(n.replace(',', ''))
-                        if v > 0:
-                            vlm_total = v
-                            break
-                    except ValueError:
-                        pass
-                continue  # Don't add total line to item lines
-
-            # Pipe-delimited: "name | qty | unit | price"
-            parts = [p.strip() for p in stripped.split('|')]
-            if len(parts) >= 2 and parts[0] and parts[0].lower() != 'n/a':
-                # Reconstruct as a plain OCR-style line for the existing parser
-                reconstructed = ' '.join(p for p in parts if p and p.lower() != 'n/a')
-                if reconstructed:
-                    lines.append(normalize_ocr_text(reconstructed))
-            else:
-                # Free-form line — pass through as-is
-                lines.append(normalize_ocr_text(stripped))
-
-        return lines, vlm_total
-
-
-# ============================================================================
-# MAIN ORCHESTRATOR (4-Pass Agentic Loop)
-# ============================================================================
-
-class ParchiOrchestrator:
+def extract_customer_name(
+    dets: list[dict], img_height: int
+) -> str | None:
+    threshold_y = img_height * Config.NAME_ROI_RATIO
+    top_dets = [d for d in dets if d["yc"] < threshold_y]
+    if not top_dets:
+        return None
+    top_lines = group_into_lines(top_dets)
+    for line in top_lines:
+        text = normalise_text(line_text(line)).strip()
+        if re.search(r"\d", text):                              # has digits → skip
+            continue
+        if not re.fullmatch(r"[a-zA-Z\u0600-\u06FF\s]{3,30}", text):
+            continue
+        if _NAME_SKIP.search(text):                             # keyword → skip
+            continue
+        # Capitalise ASCII words; leave Urdu as-is
+        words = [w.capitalize() if w[:1].isascii() else w for w in text.split()]
+        return " ".join(words)
+    return None
+
+# ---------------------------------------------------------------------------
+# Total extraction (bottom 30 % + keyword scan)
+# ---------------------------------------------------------------------------
+
+def extract_total(
+    reader: easyocr.Reader,
+    bgr: np.ndarray,
+    all_dets: list[dict],
+    img_height: int,
+) -> tuple[float | None, bool]:
     """
-    Main orchestrator implementing 4-pass agentic processing.
-    
-    Pass 1: Vision Enhancement - Image preprocessing and quality improvement
-    Pass 2: Multi-Engine OCR - Text extraction with ensemble
-    Pass 3: Semantic Grounding - Lexicon-based correction and normalization
-    Pass 4: Agentic Math Loop - Validation, correction, and confidence scoring
-    
-    Enhanced with:
-    - HITL Feedback Learning
-    - Zero Data Retention Compliance
-    - Advanced Self-Correction
+    Returns (total_value, found_via_keyword).
+    Runs a dedicated re-OCR on the bottom 25 % with digit whitelist.
     """
-    
-    def __init__(self):
-        self.config = SystemConfig()
-        self.preprocessor = AdvancedImagePreprocessor(self.config)
-        self.ocr_engine = MultiEngineOCR(self.config)
-        # v6.0: Primary VLM engine (Qwen2-VL-2B-Instruct, CPU fp32)
-        self.vlm_engine = QwenVLMEngine(self.config)
-        self.parser = IntelligentParser(self.config)
-        self.calc_engine = AdvancedCalculationEngine(self.config)
-        self.validator = AgenticMathValidator(self.config)
-
-        # Enterprise features
-        self.feedback_learner = FeedbackLearner(max_feedback_items=500)
-        self.zdr_manager = ZDRCompliance()
-        self.self_corrector = AgenticSelfCorrector(self.feedback_learner)
-        
-    async def process(self, image_bytes: bytes) -> ProcessingResult:
-        """Main processing pipeline with enterprise features.
-        v6.0: Image-hash cache + concurrency semaphore + VLM pass-1A.
-        """
-        start_time = time.time()
-        request_id = hashlib.md5(image_bytes).hexdigest()[:16]
-        all_corrections = []
-
-        # ── Cache lookup (SHA-256 keyed, 1-hour TTL) ──────────────────────────
-        cached = _IMAGE_CACHE.get(image_bytes)
-        if cached is not None:
-            logger.info("[%s] Cache hit — returning cached result.", request_id)
-            return cached
-
-        # ── Concurrency guard: max 2 simultaneous requests ────────────────────
-        result: Optional[ProcessingResult] = None
-        async with _REQUEST_SEMAPHORE:
-            logger.info(
-                "[%s] Processing started | RSS=%.0f MB",
-                request_id, _get_rss_mb(),
-            )
-            try:
-                result = await self._process_inner(
-                    image_bytes, request_id, start_time, all_corrections
-                )
-            except Exception as outer_exc:
-                logger.error("[%s] Outer pipeline error: %s", request_id, outer_exc, exc_info=True)
-                result = ProcessingResult(
-                    request_id=request_id,
-                    success=False,
-                    status=ProcessingStatus.FAILED,
-                    errors=[str(outer_exc)],
-                    processing_time_ms=int((time.time() - start_time) * 1000),
-                )
-            finally:
-                _free_memory()
-                logger.info(
-                    "[%s] Memory after cleanup | RSS=%.0f MB",
-                    request_id, _get_rss_mb(),
-                )
+    # --- keyword scan in full dets first ---
+    keyword_total = _scan_keyword_total(all_dets)
+    if keyword_total is not None:
+        return keyword_total, True
 
-        if result is not None and result.success:
-            _IMAGE_CACHE.set(image_bytes, result)
-        return result or ProcessingResult(
-            request_id=request_id,
-            success=False,
-            status=ProcessingStatus.FAILED,
-            errors=["unknown_pipeline_failure"],
-            processing_time_ms=int((time.time() - start_time) * 1000),
+    # --- bottom-ROI re-OCR ---
+    roi_y = int(img_height * 0.75)
+    roi   = bgr[roi_y:, :]
+    try:
+        raw = reader.readtext(
+            roi,
+            text_threshold=0.10,
+            low_text=0.05,
+            allowlist="0123456789.,۰۱۲۳۴۵۶۷۸۹",
         )
+        candidates: list[float] = []
+        for (_, text, _) in raw:
+            norm = normalise_text(text)
+            nums = extract_numbers(norm)
+            candidates.extend(nums)
+        if candidates:
+            return max(candidates), False
+    except Exception as exc:
+        log.warning("Bottom-ROI OCR failed: %s", exc)
+
+    return None, False
+
+
+def _scan_keyword_total(dets: list[dict]) -> float | None:
+    """Look for largest number near a total keyword."""
+    candidates: list[float] = []
+    for det in dets:
+        text = normalise_text(det["text"])
+        if TOTAL_KW.search(text):
+            nums = extract_numbers(text)
+            if nums:
+                candidates.append(max(nums))
+            # also check neighbours (same line or next line)
+    return max(candidates) if candidates else None
+
+
+def detect_transaction_type(dets: list[dict]) -> str:
+    full = " ".join(normalise_text(d["text"]) for d in dets)
+    if UDHAAR_KW.search(full):
+        return "udhaar"
+    if WASOOLI_KW.search(full):
+        return "wasooli"
+    if CASH_KW.search(full):
+        return "cash"
+    return "unknown"
+
+# ---------------------------------------------------------------------------
+# Master pipeline
+# ---------------------------------------------------------------------------
+
+def process_image(image_bytes: bytes) -> dict:
+    t0 = time.monotonic()
+    request_id = hashlib.sha256(image_bytes).hexdigest()[:16]
+
+    # ---- cache lookup (full SHA-256 key) ----
+    img_hash = hashlib.sha256(image_bytes).hexdigest()
+    cached   = _cache.get(img_hash)
+    if cached is not None:
+        log.info("Cache HIT %s", img_hash[:12])
+        return cached
+
+    # ---- resize if too large (saves OCR time) ----
+    pil_img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    w0, h0  = pil_img.size
+    if max(w0, h0) > Config.TARGET_WIDTH:
+        scale   = Config.TARGET_WIDTH / max(w0, h0)
+        pil_img = pil_img.resize((int(w0 * scale), int(h0 * scale)), Image.LANCZOS)
+
+    # ---- decode & auto-rotate ----
+    bgr  = _pil_to_bgr(pil_img)
+    bgr  = _auto_rotate(bgr)
+    h, w = bgr.shape[:2]
+
+    reader = get_reader()
+
+    # ---- multi-variant OCR ----
+    variants = build_variants(bgr)
+    all_variant_results: list[list[dict]] = []
+    for v in variants:
+        all_variant_results.append(_run_ocr(reader, v))
+
+    merged_dets = merge_ocr_results(all_variant_results)
+    log.info("[%s] Merged %d detections from %d variants",
+             request_id[:8], len(merged_dets), len(variants))
+
+    if not merged_dets:
+        result = _empty_result(t0, request_id)
+        _cache.set(img_hash, result)
+        return result
+
+    # ---- customer name ----
+    customer_name = extract_customer_name(merged_dets, h)
+
+    # ---- line grouping (exclude top NAME_ROI for items) ----
+    name_cutoff = h * Config.NAME_ROI_RATIO
+    body_dets   = [d for d in merged_dets if d["yc"] >= name_cutoff]
+    lines       = group_into_lines(body_dets)
+
+    # ---- item parsing ----
+    items: list[dict] = []
+    for line in lines:
+        raw  = line_text(line)
+        norm = normalise_text(raw)
+        if TOTAL_KW.search(norm) or UDHAAR_KW.search(norm) or WASOOLI_KW.search(norm):
+            continue
+        item = parse_item_from_line(line)
+        if item and item["price"] > 0:
+            items.append(item)
+
+    # ---- total extraction ----
+    total_val, found_by_kw = extract_total(reader, bgr, merged_dets, h)
+    items_sum = round(sum(i["price"] * i["quantity"] for i in items), 2)
+
+    if total_val is None:
+        total_val = items_sum
+        mismatch  = False
+        log.info("[%s] No total found; summed items → %.2f", request_id[:8], total_val)
+    else:
+        tolerance = max(5.0, total_val * 0.05)
+        mismatch  = abs(total_val - items_sum) > tolerance
+
+    # ---- transaction type ----
+    tx_type = detect_transaction_type(merged_dets)
+
+    # ---- build response ----
+    elapsed_ms = round((time.monotonic() - t0) * 1000, 1)
+    result = {
+        "request_id":         request_id,
+        "success":            True,
+        "customer_name":      customer_name,
+        "items":              items,
+        "total":              round(float(total_val), 2),
+        "mismatch":           mismatch,
+        "transaction_type":   tx_type,
+        "processing_time_ms": elapsed_ms,
+        "item_count":         len(items),
+    }
+    _cache.set(img_hash, result)
+    gc.collect()
+    log.info("[%s] Done %.0f ms | items=%d | total=%.2f | mismatch=%s",
+             request_id[:8], elapsed_ms, len(items), total_val, mismatch)
+    return result
 
-    async def _process_inner(
-        self,
-        image_bytes: bytes,
-        request_id: str,
-        start_time: float,
-        all_corrections: list,
-    ) -> "ProcessingResult":
-        """Core extraction logic (called inside semaphore)."""
-        vlm_lines_used = False
-        vlm_total_hint: Optional[float] = None
 
-        try:
-            # Load image
-            pil_img = Image.open(io.BytesIO(image_bytes)).convert('RGB')
-            rgb = np.array(pil_img)
-
-            # ===== PASS 1: VISION ENHANCEMENT =====
-            logger.info("[%s] PASS 1: Vision Enhancement", request_id)
-            enhanced = self.preprocessor.enhance_image(rgb)
-            quality_metrics = self.preprocessor.analyze_image_quality(enhanced)
-            sharpness = quality_metrics.get('sharpness', 0.70)
-            mismatch = False
-
-            if sharpness < SystemConfig.AGGRESSIVE_PARSING_THRESHOLD:
-                logger.info("[%s] AGGRESSIVE MODE (sharpness=%.2f)", request_id, sharpness)
-
-            conf_thresh, text_thresh = SystemConfig.get_adaptive_thresholds(sharpness)
-            logger.info("[%s] Adaptive thresholds conf=%.2f text=%.2f", request_id, conf_thresh, text_thresh)
-
-            # ===== PASS 1A: VLM PRIMARY ENGINE (Qwen2-VL-2B-Instruct) =====
-            # Run on the original PIL image (richer colour info than processed grayscale).
-            # Falls back silently if VLM is disabled / fails / OOM.
-            vlm_raw: Optional[str] = None
-            if SystemConfig.ENABLE_VLM and not self.vlm_engine._disabled:
-                logger.info("[%s] PASS 1A: VLM extraction (Qwen2-VL)", request_id)
-                try:
-                    vlm_raw = await asyncio.wait_for(
-                        asyncio.to_thread(self.vlm_engine.extract, pil_img),
-                        timeout=SystemConfig.VLM_TIMEOUT_SECONDS,
-                    )
-                except asyncio.TimeoutError:
-                    logger.warning("[%s] VLM timed out after %.0fs — falling back to OCR.",
-                                   request_id, SystemConfig.VLM_TIMEOUT_SECONDS)
-                    all_corrections.append("vlm_timeout_fallback_to_ocr")
-                except Exception as vlm_exc:
-                    logger.warning("[%s] VLM error: %s — falling back.", request_id, vlm_exc)
-                    all_corrections.append(f"vlm_error_fallback: {vlm_exc}")
-
-            vlm_lines: List[str] = []
-            if vlm_raw:
-                vlm_lines, vlm_total_hint = QwenVLMEngine.parse_vlm_output(vlm_raw)
-                if vlm_lines:
-                    vlm_lines_used = True
-                    logger.info("[%s] VLM extracted %d lines (total_hint=%s)",
-                                request_id, len(vlm_lines), vlm_total_hint)
-                    all_corrections.append(f"vlm_extracted_{len(vlm_lines)}_lines")
-
-            # ===== PASS 2: MULTI-ENGINE OCR (fallback / supplement) =====
-            logger.info("[%s] PASS 2: Multi-Engine OCR", request_id)
-            lines, ocr_blocks = self.ocr_engine.extract_text_lines(enhanced)
-
-            # Merge VLM lines with OCR lines (VLM first = higher priority)
-            if vlm_lines:
-                seen_norm = {norm(x) for x in vlm_lines}
-                for ol in lines:
-                    k = norm(ol)
-                    if len(k) > 2 and k not in seen_norm:
-                        vlm_lines.append(ol)
-                        seen_norm.add(k)
-                lines = vlm_lines   # Use merged set
-
-            # Low-quality rescue pass
-            if sharpness < 0.55 or len(lines) < 4:
-                logger.info(
-                    f"[{request_id}] PASS 2B: Variant OCR rescue (sharpness={sharpness:.2f}, base_lines={len(lines)})"
-                )
-                merged_lines: List[str] = list(lines)
-                merged_blocks: List[OCRTextBlock] = list(ocr_blocks)
-                seen_line_keys = {norm(x) for x in merged_lines}
-                seen_block_keys = {
-                    f"{norm(b['text'])}:{int((b['bbox'][0][1] + b['bbox'][2][1]) / 2)}:{b['engine']}"
-                    for b in merged_blocks
-                    if b.get("bbox")
-                }
-                # Speed guard: don't over-ensemble. Use at most 2 variants and stop early.
-                for variant in self.preprocessor.generate_variants(enhanced)[:2]:
-                    v_lines, v_blocks = self.ocr_engine.extract_text_lines(variant)
-                    for vl in v_lines:
-                        k = norm(vl)
-                        if len(k) > 2 and k not in seen_line_keys:
-                            merged_lines.append(vl)
-                            seen_line_keys.add(k)
-                    for vb in v_blocks:
-                        try:
-                            k = f"{norm(vb['text'])}:{int((vb['bbox'][0][1] + vb['bbox'][2][1]) / 2)}:{vb['engine']}"
-                        except Exception:
-                            continue
-                        if k not in seen_block_keys:
-                            merged_blocks.append(vb)
-                            seen_block_keys.add(k)
-                    if len(merged_lines) >= 8 and len(merged_blocks) >= 20:
-                        break
-                lines, ocr_blocks = merged_lines, merged_blocks
-            
-            if not lines:
-                return ProcessingResult(
-                    request_id=request_id,
-                    success=False,
-                    status=ProcessingStatus.FAILED,
-                    errors=["No text detected in image"],
-                    processing_time_ms=int((time.time() - start_time) * 1000)
-                )
-            
-            # ========== PASS 3: SEMANTIC GROUNDING & PARSING ==========
-            logger.info(f"[{request_id}] PASS 3: Semantic Grounding")
-            customer_name, name_conf, name_warnings, name_hitl = self.parser.parse_customer_name(
-                lines,
-                ocr_blocks=ocr_blocks,
-                image_shape=enhanced.shape[:2],
-                top_roi_lines=self.ocr_engine.extract_top_roi_name_lines(
-                    enhanced, roi_ratio=SystemConfig.NAME_ROI_RATIO
-                ),
-            )
-            all_corrections.extend(name_warnings)
-            items, items_conf = self.parser.parse_items(lines)
-            total, total_conf = self.parser.parse_total(lines, items)
-            paper_total = float(total or 0.0)
-            items, gap_notes = self.parser.reconcile_items_from_ocr_gap(items, total, ocr_blocks)
-            all_corrections.extend(gap_notes)
-            items, total, calc_notes, calc_total_conf = self.calc_engine.reconcile(lines, items, total)
-            all_corrections.extend(calc_notes)
-            total_conf = max(total_conf, calc_total_conf)
-
-            # Fast mismatch rescue (only when needed):
-            # If total is missing or mismatch is high, OCR bottom ROI for totals and reconcile again.
-            if items:
-                item_sum = float(sum(i.quantity * i.price for i in items))
-                rel_err = abs(item_sum - float(total)) / max(item_sum, float(total), 1.0) if total > 0 else 1.0
-                if total <= 0 or rel_err > 0.12:
-                    bot_lines = self.ocr_engine.extract_bottom_roi_total_lines(enhanced, roi_ratio=0.38)
-                    if bot_lines:
-                        all_corrections.append(f"bottom_roi_total_micro_pass_lines_{len(bot_lines)}")
-                        mix_lines = list(lines) + bot_lines
-                        items, total, calc_notes2, calc_total_conf2 = self.calc_engine.reconcile(
-                            mix_lines, items, total
-                        )
-                        all_corrections.extend(calc_notes2)
-                        total_conf = max(total_conf, calc_total_conf2)
-            
-            # v5.2: FORCE RECONSTRUCTION if extraction still failing
-            if not items and total > 0 and len(lines) > 2:
-                logger.warning(f"[{request_id}] v5.2: Items extraction empty, FORCING reconstruction from all text lines")
-                # v5.2: Treat EVERY line containing a number as a potential item line
-                extracted_from_lines = []
-                for line in lines:
-                    line_lower = line.lower()
-                    skip_kw = ['date', 'total', 'udhaar', 'wasooli', 'cash', 'name', 'customer', 'time', 'receipt']
-                    if any(kw in line_lower for kw in skip_kw):
-                        continue
-                    # Find all numbers in this line
-                    nums = re.findall(r'\d+(?:\.\d+)?', line)
-                    if nums:
-                        # Extract text part (before the last number)
-                        text_part = re.sub(r'\d+(?:\.\d+)?', '', line).strip()
-                        # Get the last number as price
-                        try:
-                            price = float(nums[-1])
-                            if 1 <= price <= total and text_part:
-                                extracted_from_lines.append((text_part, price, line))
-                        except:
-                            pass
-                
-                # Create items from extracted lines
-                if extracted_from_lines:
-                    for idx, (text_part, price, orig_line) in enumerate(extracted_from_lines):
-                        item_name = normalize_ocr_text(text_part).lower()
-                        normalized_name, _ = PakistaniRetailLexicon.normalize_item_name(item_name)
-                        if not normalized_name or normalized_name == item_name:
-                            normalized_name = f"Uncategorized: {item_name[:20]}"
-                        
-                        items.append(ExtractedItem(
-                            name=normalized_name,
-                            quantity=1.0,
-                            price=price,
-                            unit="unit",
-                            confidence=0.55,  # Reconstruction confidence
-                            original_text=orig_line,
-                            semantic_match=None,
-                            semantic_score=0.0
-                        ))
-                    items_conf = 0.58
-                    logger.info(f"[{request_id}] v5.2: FORCE extracted {len(items)} items from all lines")
-                
-                # Also try reconstruction algorithm if force extraction didn't work
-                if not items:
-                    all_numbers = []
-                    for line in lines:
-                        nums = re.findall(r'\d+(?:\.\d+)?', line)
-                        for num in nums:
-                            try:
-                                val = float(num)
-                                if 1 < val < total:
-                                    all_numbers.append(val)
-                            except:
-                                pass
-                    
-                    reconstructed = reconstruct_items_from_total(total, all_numbers)
-                    if reconstructed:
-                        for idx, (qty, price) in enumerate(reconstructed):
-                            items.append(ExtractedItem(
-                                name=f"ReconstructedItem_{idx+1}",
-                                quantity=qty,
-                                price=price,
-                                unit="unit",
-                                confidence=0.50,
-                                original_text="",
-                                semantic_match=None,
-                                semantic_score=0.0
-                            ))
-                        items_conf = 0.55
-                        logger.info(f"[{request_id}] v5.2: Reconstructed {len(items)} items from total")
-            
-            # v5.1: Fallback logic - if items extraction still failed, return all numeric lines
-            if not items and len(lines) > 2:
-                logger.warning(f"[{request_id}] v5.1: Fallback - extracting all numeric lines")
-                # Extract all detected numbers as UnknownItem entries
-                all_numbers = []
-                for line in lines:
-                    nums = re.findall(r'\d+(?:\.\d+)?', line)
-                    for num in nums:
-                        try:
-                            val = float(num)
-                            if 1 < val < self.parser.config.MAX_REASONABLE_PRICE:
-                                all_numbers.append((val, line))
-                        except:
-                            pass
-                
-                # Create items from numeric lines
-                if all_numbers:
-                    for idx, (num_val, orig_line) in enumerate(all_numbers):
-                        items.append(ExtractedItem(
-                            name=f"UnknownItem_{idx+1}",
-                            quantity=1.0,
-                            price=num_val,
-                            unit="unit",
-                            confidence=0.35,
-                            original_text=orig_line,
-                            semantic_match=None,
-                            semantic_score=0.0
-                        ))
-                    items_conf = 0.40
-                    logger.info(f"[{request_id}] Fallback created {len(items)} items from numeric lines")
-            
-            # ========== PASS 4A: INTELLIGENT SELF-CORRECTION (Enhanced) ==========
-            logger.info(f"[{request_id}] PASS 4A: Intelligent Self-Correction")
-            
-            # Pipeline notes: every stage appends into all_corrections only (no parallel *corrections* lists).
-            items, _batch = self.self_corrector.apply_learned_corrections(items)
-            all_corrections.extend(_batch)
-
-            items, _batch = self.self_corrector.validate_item_prices(items)
-            all_corrections.extend(_batch)
-
-            corrected_total, _batch = self.self_corrector.apply_mathematical_corrections(items, total)
-            all_corrections.extend(_batch)
-
-            # ========== PASS 4B: AGENTIC MATH VALIDATION ==========
-            logger.info(f"[{request_id}] PASS 4B: Agentic Math Validation")
-            math_validated, final_total, _batch, final_total_conf = self.validator.validate(items, corrected_total)
-            all_corrections.extend(_batch)
-            
-            # Detect transaction type
-            full_text = ' '.join(lines)
-            tx_type, type_conf = PakistaniRetailLexicon.detect_transaction_type(full_text)
-            if tx_type == "cash":
-                tx_type = "wasooli"
-            if tx_type not in ("udhaar", "wasooli"):
-                tx_type = "udhaar" if any(k in full_text.lower() for k in ["udhaar", "ادھار", "بقایا"]) else "wasooli"
-            
-            # v5.2: Post-correction confidence boosting
-            logger.info(f"[{request_id}] v5.2: Post-correction confidence boosting")
-            for item in items:
-                # Boost confidence if math validates
-                if math_validated:
-                    item.confidence += SystemConfig.CONF_BOOST_MATH
-                    item.confidence = min(0.98, item.confidence)
-                
-                # Boost for lexicon matches
-                if item.semantic_match and item.semantic_score > 0.7:
-                    item.confidence += SystemConfig.CONF_BOOST_LEXICON
-                    item.confidence = min(0.98, item.confidence)
-            
-            # Recalculate average confidence
-            items_conf = sum(item.confidence for item in items) / len(items) if items else 0.5
-
-            # Final math rescue for medium mismatch: prefer internally consistent item sum (line totals).
-            if items and final_total > 0:
-                item_sum = sum(float(i.price) for i in items)
-                rel_gap = abs(item_sum - final_total) / max(item_sum, final_total, 1.0)
-                if not math_validated and rel_gap <= 0.30:
-                    all_corrections.append(f"post_pass_total_aligned_to_items_{final_total}_to_{item_sum}")
-                    final_total = float(item_sum)
-                    math_validated = True
-
-            # Hard guarantee: never return a total that doesn't equal sum(qty * price).
-            if items:
-                computed_sum = float(sum(float(i.price) for i in items))
-                if computed_sum > 0:
-                    if abs(float(final_total or 0.0) - computed_sum) > max(2.0, 0.01 * computed_sum):
-                        all_corrections.append(
-                            f"final_total_overridden_to_computed_sum_from_{round(float(final_total or 0.0),2)}_to_{round(computed_sum,2)}"
-                        )
-                        final_total = float(computed_sum)
-                    # If paper total differs, flag warning but still keep computed sum.
-                    if paper_total > 0 and abs(paper_total - computed_sum) > max(2.0, 0.01 * computed_sum):
-                        all_corrections.append(
-                            f"paper_total_mismatch_paper_{round(paper_total,2)}_computed_{round(computed_sum,2)}"
-                        )
-                        mismatch = True
-                    else:
-                        mismatch = False
-                    math_validated = True
-
-            # Output shape hardening for mobile form mapping.
-            customer_name = (customer_name or "").strip() or None
-            for it in items:
-                if it.unit not in ("kg", "g", "liter", "ml", "dozen", "pc", "packet"):
-                    it.unit = "pc"
-                if it.quantity <= 0:
-                    it.quantity = 1.0
-                if it.price <= 0:
-                    it.price = 1.0
-
-            confidence = self.validator.calculate_confidence(items, math_validated, final_total)
-            confidence['semantic'] = items_conf
-            confidence['extraction'] = total_conf
-            confidence['calc_engine'] = round(float(total_conf), 3)
-            
-            # v5.2: Add quality metrics to confidence
-            confidence['image_quality'] = sharpness
-            confidence['parsing_mode'] = 'aggressive' if sharpness < SystemConfig.AGGRESSIVE_PARSING_THRESHOLD else 'standard'
-            
-            # v5.2: Determine items extraction source for metadata
-            uncategorized_count = sum(1 for item in items if "Uncategorized:" in item.name)
-            fallback_used = any("UnknownItem_" in item.name for item in items)
-            reconstruction_used = any("ReconstructedItem_" in item.name for item in items)
-            force_extracted = any("Uncategorized:" in item.name for item in items)
-            
-            # Determine extraction method
-            if reconstruction_used:
-                extraction_method = "reconstruction"
-            elif force_extracted:
-                extraction_method = "force_extraction_from_lines"
-            elif fallback_used:
-                extraction_method = "numeric_fallback"
-            else:
-                extraction_method = "standard_parsing"
-            
-            # Create result
-            result = ProcessingResult(
-                request_id=request_id,
-                success=True,
-                customer_name=customer_name,
-                hitl_data=name_hitl,
-                items=items,
-                total_amount=final_total,
-                transaction_type=tx_type,
-                mismatch=bool(mismatch),
-                confidence=confidence,
-                processing_time_ms=int((time.time() - start_time) * 1000),
-                status=ProcessingStatus.COMPLETED,
-                metadata={
-                    "lines_extracted": len(lines),
-                    "ocr_blocks": len(ocr_blocks),
-                    "items_extracted_from_ocr_blocks": len(items) > 0,
-                    "items_count": len(items),
-                    "uncategorized_items_count": uncategorized_count,
-                    "quality_metrics": quality_metrics,
-                    "corrections": all_corrections,
-                    "math_validated": math_validated,
-                    "hitl_learning_active": True,
-                    "fallback_used": fallback_used,
-                    "reconstruction_used": reconstruction_used,
-                    "force_extracted_from_lines": force_extracted,
-                    "items_extraction_method": extraction_method,
-                    "v5_2_aggressive_mode": sharpness < SystemConfig.AGGRESSIVE_PARSING_THRESHOLD,
-                    "image_sharpness": sharpness,
-                    "adaptive_thresholds_applied": True,
-                    "easyocr_rows_skipped": self.ocr_engine.easyocr_rows_skipped,
-                    "paddle_rows_skipped": self.ocr_engine.paddle_rows_skipped,
-                    "calc_engine_enabled": True,
-                    "calc_engine_notes_count": len([c for c in all_corrections if "calc_engine" in c or "single_item_price_repair" in c]),
-                    # v6.0 VLM metadata
-                    "vlm_enabled": SystemConfig.ENABLE_VLM,
-                    "vlm_model": SystemConfig.VLM_MODEL_ID,
-                    "vlm_lines_used": vlm_lines_used,
-                    "vlm_total_hint": vlm_total_hint,
-                    "engine_used": "vlm+ensemble" if vlm_lines_used else ("ensemble" if (EASYOCR_AVAILABLE or SystemConfig.ENABLE_PADDLE) else "easyocr"),
-                    "cache_stats": _IMAGE_CACHE.stats(),
-                    "rss_mb": round(_get_rss_mb(), 1),
-                }
-            )
-            
-            if all_corrections:
-                result.warnings = all_corrections
-
-            # Store for ZDR-compliant access (expires in 1 hour)
-            self.zdr_manager.store_request_data(request_id, result)
-
-            logger.info(
-                "[%s] Completed in %dms | VLM=%s | Corrections=%d | RSS=%.0fMB",
-                request_id, result.processing_time_ms,
-                "yes" if vlm_lines_used else "no",
-                len(all_corrections), _get_rss_mb(),
-            )
-            return result
-
-        except Exception as e:
-            logger.error("[%s] Processing failed: %s", request_id, e, exc_info=True)
-            return ProcessingResult(
-                request_id=request_id,
-                success=False,
-                status=ProcessingStatus.FAILED,
-                errors=[str(e)],
-                processing_time_ms=int((time.time() - start_time) * 1000)
-            )
-
-
-# ============================================================================
-# FASTAPI APPLICATION
-# ============================================================================
+def _empty_result(t0: float, request_id: str = "") -> dict:
+    return {
+        "request_id":         request_id,
+        "success":            False,
+        "customer_name":      None,
+        "items":              [],
+        "total":              0.0,
+        "mismatch":           False,
+        "transaction_type":   "unknown",
+        "processing_time_ms": round((time.monotonic() - t0) * 1000, 1),
+        "item_count":         0,
+    }
 
+# ---------------------------------------------------------------------------
+# FastAPI app
+# ---------------------------------------------------------------------------
 app = FastAPI(
-    title="Smart Parchi OCR Enterprise",
-    description="Professional Urdu-English handwritten receipt processing system",
-    version="6.0.0"
+    title="Parchi OCR – Minimal CPU Edition",
+    description="Handwritten Urdu/English receipt OCR. No GPU. No VLM.",
+    version="1.0.0",
 )
 
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
-    allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 
-# Global orchestrator
-orchestrator = ParchiOrchestrator()
-
-
-# ============================================================================
-# API ENDPOINTS
-# ============================================================================
 
 @app.on_event("startup")
-async def startup_event():
-    """v6.0: Warm up EasyOCR/Paddle on startup. VLM loads lazily on first request."""
-    global OCR_WARMUP_STATUS
-    SystemConfig.validate()
-    logger.info("Smart Parchi OCR v6.0.0 started")
-    logger.info(
-        "Engines | VLM=%s(%s) PADDLE=%s EASYOCR=%s TORCH=%s TRANSFORMERS=%s",
-        SystemConfig.ENABLE_VLM,
-        SystemConfig.VLM_MODEL_ID,
-        SystemConfig.ENABLE_PADDLE,
-        EASYOCR_AVAILABLE,
-        TORCH_AVAILABLE,
-        TRANSFORMERS_AVAILABLE,
-    )
-    logger.info("Startup RSS: %.0f MB", _get_rss_mb())
-
-    if os.getenv("SKIP_OCR_WARMUP", "0").strip() in ("1", "true", "yes"):
-        logger.warning("SKIP_OCR_WARMUP=1 — OCR will init on first request (slower first hit).")
-        OCR_WARMUP_STATUS = "skipped"
-    else:
+async def _warmup():
+    """Non-blocking warmup – loads EasyOCR weights in a thread so startup is fast."""
+    async def _load():
         try:
-            # Warm up EasyOCR + Paddle (downloads models if not cached).
-            # VLM (Qwen2-VL) loads lazily on first inference request to keep
-            # startup fast and let HF health probe pass before 8 GB loads.
-            await asyncio.to_thread(orchestrator.ocr_engine.initialize)
-            logger.info("OCR ensemble warm-up finished. RSS: %.0f MB", _get_rss_mb())
-            OCR_WARMUP_STATUS = "complete"
+            await asyncio.to_thread(get_reader)
+            log.info("Warmup complete.")
         except Exception as exc:
-            logger.error("OCR warm-up failed (will retry on first request): %s", exc, exc_info=True)
-            OCR_WARMUP_STATUS = "failed"
-
-    logger.info(
-        "v6.0 ready | cache_ttl=%ds max_entries=%d concurrency_limit=2",
-        SystemConfig.CACHE_TTL,
-        SystemConfig.MAX_CACHE_SIZE,
-    )
+            log.error("Warmup failed: %s", exc)
+    asyncio.create_task(_load())
 
 
-@app.get("/health")
-async def health_check() -> Dict[str, Any]:
-    """Health check endpoint"""
-    paddle_ready = bool(PADDLE_AVAILABLE and SystemConfig.ENABLE_PADDLE)
+@app.get("/", tags=["health"])
+def root():
     return {
-        "status": "healthy",
-        "version": "6.0.0",
-        "timestamp": datetime.now().isoformat(),
-        "warmup_status": OCR_WARMUP_STATUS,
-        "features": {
-            "vision_enhancement": True,
-            "multi_engine_ocr": True,
-            "semantic_lexicon": True,
-            "agentic_math": True,
-            "vlm_enabled": SystemConfig.ENABLE_VLM,
-            "vlm_model": SystemConfig.VLM_MODEL_ID if SystemConfig.ENABLE_VLM else None,
-            "vlm_loaded": getattr(orchestrator.vlm_engine, "_loaded", False),
-            "vlm_disabled": getattr(orchestrator.vlm_engine, "_disabled", False),
-            "enable_paddle": SystemConfig.ENABLE_PADDLE,
-            "paddle_available": bool(PADDLE_AVAILABLE and SystemConfig.ENABLE_PADDLE),
-            "easyocr_available": EASYOCR_AVAILABLE,
-            "ocr_initialized": getattr(orchestrator.ocr_engine, "_initialized", False),
-            "tesseract_available": TESSERACT_AVAILABLE,
-            "sklearn_available": SKLEARN_AVAILABLE,
-            "torch_available": TORCH_AVAILABLE,
-            "transformers_available": TRANSFORMERS_AVAILABLE,
-            "cache_stats": _IMAGE_CACHE.stats(),
-            "rss_mb": round(_get_rss_mb(), 1),
-        }
+        "service": "parchi-ocr-minimal",
+        "status":  "ok",
+        "time_utc": datetime.now(timezone.utc).isoformat(),
     }
 
-@app.get("/")
-async def root() -> Dict[str, Any]:
-    """Stop noisy GET / 404 logs (HF/health probes)."""
-    return {"ok": True, "service": "smart-parchi-ocr", "version": "6.0.0"}
-
 
-@app.post("/process-parchi", response_model=ProcessingResult)
-async def process_parchi(
-    image: UploadFile = File(...),
-    background_tasks: BackgroundTasks = None
-) -> ProcessingResult:
-    """
-    Process a single parchi image.
-    
-    Accepts image files (JPEG, PNG, etc.) and returns structured extraction.
-    """
-    # Validate file
-    if not image.content_type or not image.content_type.startswith("image/"):
-        raise HTTPException(status_code=400, detail="File must be an image")
-    
-    # Read image
-    contents = await image.read()
-    if len(contents) > SystemConfig.MAX_IMAGE_SIZE_MB * 1024 * 1024:
-        raise HTTPException(status_code=400, detail=f"Image too large (max {SystemConfig.MAX_IMAGE_SIZE_MB}MB)")
-    
-    # Process with hard timeout guard so UI never hangs indefinitely.
-    try:
-        result = await asyncio.wait_for(
-            orchestrator.process(contents),
-            timeout=float(SystemConfig.FASTAPI_HARD_TIMEOUT_SECONDS),
-        )
-    except asyncio.TimeoutError:
-        return ProcessingResult(
-            request_id=hashlib.md5(contents).hexdigest()[:16],
-            success=False,
-            status=ProcessingStatus.FAILED,
-            errors=["timeout_error"],
-            warnings=[f"processing_exceeded_{int(SystemConfig.FASTAPI_HARD_TIMEOUT_SECONDS)}s"],
-            processing_time_ms=int(SystemConfig.FASTAPI_HARD_TIMEOUT_SECONDS * 1000),
-            metadata={
-                "timeout_seconds": float(SystemConfig.FASTAPI_HARD_TIMEOUT_SECONDS),
-                "filename": image.filename,
-                "content_type": image.content_type,
-            },
-        )
-    
-    # Add file metadata
-    result.metadata["filename"] = image.filename
-    result.metadata["content_type"] = image.content_type
-    # Backward/forward compatibility for mobile clients:
-    # provide both legacy and modern keys so field mapping never breaks.
-    result.total = float(result.total_amount or 0.0)
-    result.type = str(result.transaction_type or "unknown")
-    result.meta = dict(result.metadata or {})
-    try:
-        c = dict(result.confidence or {})
-        overall = float(c.get("overall") or 0.0)
-        if overall <= 0:
-            parts = [float(c.get("items") or 0.0), float(c.get("total") or 0.0), float(c.get("type") or 0.0)]
-            overall = max(0.0, min(1.0, sum(parts) / max(1, len(parts))))
-        result.confidence_score = max(0.0, min(1.0, overall))
-    except Exception:
-        result.confidence_score = 0.0
-    ui_items = [
-        {
-            "item": str(it.name or "").strip(),
-            "qty": str(float(it.quantity or 0.0)).rstrip("0").rstrip(".") or "0",
-            "price": str(float(it.price or 0.0)).rstrip("0").rstrip(".") or "0",
-        }
-        for it in (result.items or [])
-        if str(it.name or "").strip() or float(it.quantity or 0.0) > 0 or float(it.price or 0.0) > 0
-    ]
-    result.items_list = ui_items
-    result.line_items = list(ui_items)
-    
-    return result
-
-
-@app.post("/process-batch")
-async def process_batch(
-    images: List[UploadFile],
-    background_tasks: BackgroundTasks
-) -> List[ProcessingResult]:
-    """
-    Process multiple images in batch.
-    Limited to 5 images per request for performance.
-    """
-    if len(images) > 5:
-        raise HTTPException(status_code=400, detail="Maximum 5 images per batch")
-    
-    results = []
-    for img in images[:5]:
-        contents = await img.read()
-        result = await orchestrator.process(contents)
-        results.append(result)
-    
-    return results
-
-
-@app.get("/lexicon")
-async def get_lexicon() -> Dict[str, List[str]]:
-    """Get the semantic lexicon for reference"""
-    return PakistaniRetailLexicon.LEXICON
-
-
-# ============================================================================
-# HUMAN-IN-THE-LOOP (HITL) FEEDBACK ENDPOINTS
-# ============================================================================
-
-@app.post("/feedback/submit")
-async def submit_feedback(feedback: UserFeedback) -> Dict[str, Any]:
-    """
-    Submit user corrections to improve the model.
-    Corrections are immediately used to fine-tune future predictions.
-    
-    PRIVACY: Data is not persisted (Zero Data Retention compliant).
-    """
-    if not feedback.timestamp:
-        feedback.timestamp = datetime.now().isoformat()
-    
-    result = orchestrator.feedback_learner.add_feedback(feedback)
-    
-    return {
-        **result,
-        "privacy_note": "Your feedback is immediately processed but not stored persistently",
-        "your_request_id": feedback.request_id
-    }
-
-
-@app.get("/feedback/insights")
-async def get_feedback_insights() -> Dict[str, Any]:
-    """
-    Get insights into common OCR errors and learned patterns.
-    Useful for understanding system performance and high-error areas.
-    """
-    insights = orchestrator.feedback_learner.get_error_insights()
-    learned_corrections = orchestrator.feedback_learner.get_high_confidence_corrections()
-    
+@app.get("/health", tags=["health"])
+def health():
     return {
-        **insights,
-        "learned_corrections_active": len(learned_corrections),
-        "common_items_learned": list(learned_corrections.keys())[:10]
+        "status":     "ok",
+        "version":    "1.1.0",
+        "engine":     "EasyOCR [ur, en]",
+        "cache_size": len(_cache._store),
     }
 
 
-@app.get("/feedback/status")
-async def get_feedback_status() -> Dict[str, Any]:
-    """Get real-time feedback system status"""
-    status = {
-        "learning_active": True,
-        "total_corrections_aggregated": orchestrator.feedback_learner.total_corrections,
-        "unique_items_learned": len(orchestrator.feedback_learner.feedback_history),
-        "error_patterns_detected": len(orchestrator.feedback_learner.error_patterns),
-        "last_cleared": orchestrator.feedback_learner.last_cleared.isoformat()
-    }
-    return status
-
-
-# ============================================================================
-# COMPLIANCE & PRIVACY ENDPOINTS
-# ============================================================================
-
-@app.get("/compliance/zdr-status")
-async def get_zdr_status() -> Dict[str, Any]:
-    """
-    Zero Data Retention (ZDR) compliance status.
-    Ensures no sensitive financial data is persisted.
-    """
-    return orchestrator.zdr_manager.get_compliance_status()
-
-
-@app.post("/compliance/cleanup")
-async def manual_zdr_cleanup() -> Dict[str, Any]:
-    """
-    Manually trigger data cleanup (normally automatic).
-    Removes all cached request data older than specified hours.
-    """
-    result = orchestrator.zdr_manager._cleanup_expired()
-    return {
-        "status": "manual_cleanup_executed",
-        "expired_requests_removed": result,
-        "remaining_cached": len(orchestrator.zdr_manager.request_cache)
-    }
-
+async def _handle_upload(file: UploadFile) -> dict:
+    """Shared logic for both OCR endpoints."""
+    if file.content_type and not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="File must be an image.")
+    image_bytes = await file.read()
+    if len(image_bytes) > Config.MAX_IMAGE_SIZE_MB * 1024 * 1024:
+        raise HTTPException(status_code=413,
+                            detail=f"Image too large (max {Config.MAX_IMAGE_SIZE_MB} MB).")
+    try:
+        # Run CPU-heavy work in thread so the event loop stays responsive
+        return await asyncio.to_thread(process_image, image_bytes)
+    except Exception as exc:
+        log.exception("Processing error")
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
 
-@app.get("/feedback/clear-old-data")
-async def clear_old_feedback_data(hours: int = 24) -> Dict[str, Any]:
-    """
-    Clear old feedback data (ZDR compliance).
-    Default: Clear data older than 24 hours.
-    """
-    return orchestrator.feedback_learner.clear_old_data(hours=hours)
 
+@app.post("/ocr", tags=["ocr"])
+async def ocr_endpoint(file: UploadFile = File(...)):
+    """Upload a parchi image → structured JSON (items, total, customer_name …)."""
+    return await _handle_upload(file)
 
-# ============================================================================
-# LEXICON MANAGEMENT (Enhanced)
-# ============================================================================
 
-@app.post("/lexicon/add-from-feedback")
-async def add_items_from_feedback() -> Dict[str, Any]:
-    """
-    Create extended lexicon from high-confidence learned corrections.
-    Useful for creating domain-specific custom lexicons.
-    """
-    learned = orchestrator.feedback_learner.get_high_confidence_corrections()
-    
-    extended_lexicon = {}
-    for item_name, values in learned.items():
-        if item_name not in PakistaniRetailLexicon.LEXICON:
-            extended_lexicon[item_name] = {
-                "price": values["price"],
-                "quantity": values["qty"],
-                "frequency": len(orchestrator.feedback_learner.feedback_history[item_name])
-            }
-    
-    return {
-        "status": "lexicon_extension_ready",
-        "new_items": len(extended_lexicon),
-        "items": extended_lexicon,
-        "note": "Use this to export and deploy custom lexicons"
-    }
+@app.post("/process-parchi", tags=["ocr"])
+async def process_parchi(image: UploadFile = File(...)):
+    """Alias for /ocr – compatible with reference API clients."""
+    return await _handle_upload(image)
 
 
-# ============================================================================
-# RETRIEVAL ENDPOINTS
-# ============================================================================
+@app.delete("/cache", tags=["admin"])
+def clear_cache():
+    """Clear the in-memory cache (useful for testing)."""
+    _cache._store.clear()
+    return {"cleared": True}
 
-@app.get("/result/{request_id}")
-async def retrieve_result(request_id: str) -> Optional[ProcessingResult]:
-    """
-    Retrieve cached result by request ID (within TTL).
-    Useful for async processing workflows.
-    """
-    result = orchestrator.zdr_manager.retrieve_request_data(request_id)
-    
-    if result is None:
-        raise HTTPException(
-            status_code=404,
-            detail=f"Request {request_id} not found or expired (TTL: {orchestrator.zdr_manager.cache_ttl}s)"
-        )
-    
-    return result
 
+@app.get("/cache/stats", tags=["admin"])
+def cache_stats():
+    return {"entries": len(_cache._store), "max": _CACHE_MAX, "ttl_s": _CACHE_TTL_S}
 
-# ============================================================================
-# MAIN ENTRY POINT
-# ============================================================================
 
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
 if __name__ == "__main__":
-    import uvicorn
-    
-    print("""
-    ╔═══════════════════════════════════════════════════════════════════════════╗
-    ║          SMART PARCHI OCR v5.0.0 - ENTERPRISE EDITION                     ║
-    ║      Intelligent Receipt Processing with HITL Learning                    ║
-    ╠═══════════════════════════════════════════════════════════════════════════╣
-    ║                                                                           ║
-    ║  CORE FEATURES:                                                           ║
-    ║  ✅ 7-layer Vision Enhancement Pipeline                                   ║
-    ║  ✅ Multi-Engine OCR (EasyOCR + PaddleOCR + Fallback)                     ║
-    ║  ✅ Urdu-English Bilingual Support (Nastaliq & Naskh)                    ║
-    ║  ✅ Pakistani Retail Semantic Lexicon (50+ items with Urdu variants)      ║
-    ║  ✅ Agentic 4-Pass Self-Correction Loop                                   ║
-    ║  ✅ Mathematical Validation & Auto-Correction                             ║
-    ║  ✅ Confidence Scoring with Explainable AI                                ║
-    ║                                                                           ║
-    ║  ENTERPRISE FEATURES (NEW):                                               ║
-    ║  ✅ Human-In-The-Loop (HITL) Feedback Learning System                     ║
-    ║  ✅ Zero Data Retention (ZDR) Compliance - HIPAA Ready                    ║
-    ║  ✅ Intelligent Pattern Detection from User Corrections                   ║
-    ║  ✅ Real-time Error Insights & Analytics                                  ║
-    ║  ✅ Adaptive Price Validation Against Retail Knowledge                    ║
-    ║  ✅ Multimodal Reasoning (Vision + Semantic + Math)                       ║
-    ║  ✅ Privacy-First Architecture (No Persistent Storage)                    ║
-    ║                                                                           ║
-    ║  DEPLOYMENT SPECS:                                                        ║
-    ║  • Optimized for Hugging Face Spaces (CPU Tier)                          ║
-    ║  • Memory Usage: 700-900MB (2GB limit compatible)                        ║
-    ║  • Processing Time: 2-5 seconds per image                                ║
-    ║  • Batch Processing: Up to 5 images per request                          ║
-    ║  • Auto-cleanup: Data expires after 1 hour (ZDR)                         ║
-    ║                                                                           ║
-    ║  API ENDPOINTS:                                                           ║
-    ║  POST   /process-parchi           - Process single receipt               ║
-    ║  POST   /process-batch            - Batch process (max 5)                ║
-    ║  POST   /feedback/submit          - Submit corrections (HITL)            ║
-    ║  GET    /feedback/insights        - Get error patterns                   ║
-    ║  GET    /compliance/zdr-status    - Privacy compliance                   ║
-    ║  GET    /result/{request_id}      - Retrieve cached result               ║
-    ║                                                                           ║
-    ╚═══════════════════════════════════════════════════════════════════════════╝
-    """)
-    
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=8000,
-        log_level="info"
-    )
\ No newline at end of file
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, workers=1, log_level="info")