Spaces:

Um34ER
/

bazaar-bridge-ocr

Running

App Files Files Community

bazaar-bridge-ocr / app.py

Um34ER

Update app.py

267da03 verified 14 days ago

raw

history blame contribute delete

60.3 kB

	'''Smart Parchi OCR v7 — Local Hybrid Architecture'''
	from __future__ import annotations
	import asyncio
	import gc
	import hashlib
	import io
	import logging
	import os
	import re
	import threading
	import time
	import uuid
	import warnings
	from typing import Any, Dict, List, Optional, Tuple
	import cv2
	import numpy as np
	from fastapi import FastAPI, File, HTTPException, UploadFile
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from PIL import Image
	from rapidfuzz import fuzz, process as fuzz_process
	"""Pakistani Grocery Lexicon & Urdu-to-English Transliteration Map.

	This module provides:
	1. URDU_TO_ENGLISH: Direct transliteration of common Urdu grocery terms.
	2. GROCERY_LEXICON: Canonical grocery items for fuzzy auto-correction.
	3. COMMON_MISSPELLINGS: Maps mangled OCR output to correct English names.
	4. TRANSACTION_KEYWORDS: Urdu/English cues for udhaar/cash/return detection.
	5. UNIT_MAP: Normalizes unit strings (kilogram -> kg, dozen -> dz, etc.).
	"""


	# ── Urdu → English Transliteration (common parchi items) ─────────────────────
	URDU_TO_ENGLISH: dict[str, str] = {
	# Staples
	"آٹا": "Atta",
	"چاول": "Chawal",
	"دال": "Daal",
	"چنے": "Chanay",
	"مسور": "Masoor",
	"ماش": "Maash",
	"بیسن": "Besan",
	"میدہ": "Maida",
	"سوجی": "Suji",
	# Sugar & Salt
	"چینی": "Cheeni",
	"گڑ": "Gur",
	"نمک": "Namak",
	"شکر": "Shakar",
	# Oils & Ghee
	"تیل": "Tel",
	"گھی": "Ghee",
	"بناسپتی": "Banaspati",
	"مکھن": "Makkhan",
	# Spices
	"مرچ": "Mirch",
	"ہلدی": "Haldi",
	"دھنیا": "Dhaniya",
	"زیرہ": "Zeera",
	"اجوائن": "Ajwain",
	"کالی مرچ": "Kali Mirch",
	"لال مرچ": "Lal Mirch",
	"گرم مصالحہ": "Garam Masala",
	"ادرک": "Adrak",
	"لہسن": "Lehsun",
	"پیاز": "Piyaz",
	"ٹماٹر": "Tamatar",
	"آلو": "Aloo",
	# Dairy
	"دودھ": "Doodh",
	"دہی": "Dahi",
	"پنیر": "Paneer",
	"کریم": "Cream",
	"لسی": "Lassi",
	# Beverages
	"چائے": "Chai",
	"پانی": "Paani",
	# Meat & Protein
	"گوشت": "Gosht",
	"مرغی": "Murghi",
	"مچھلی": "Machhli",
	"انڈے": "Anday",
	"قیمہ": "Qeema",
	# Bread & Bakery
	"روٹی": "Roti",
	"نان": "Naan",
	"ڈبل روٹی": "Double Roti",
	"بسکٹ": "Biscuit",
	"کیک": "Cake",
	# Fruits & Vegetables
	"سیب": "Seb",
	"کیلا": "Kela",
	"انگور": "Angoor",
	"آم": "Aam",
	"گاجر": "Gajar",
	"مٹر": "Matar",
	"بھنڈی": "Bhindi",
	"گوبھی": "Gobhi",
	"پالک": "Palak",
	"بینگن": "Baingan",
	# Miscellaneous
	"صابن": "Sabun",
	"تیل": "Tel",
	"سرکہ": "Sirka",
	"اچار": "Achaar",
	"چٹنی": "Chutney",
	"برگر": "Burger",
	"سموسہ": "Samosa",
	"پراٹھا": "Paratha",
	"بریانی": "Biryani",
	# Snacks & Packaged
	"چپس": "Chips",
	"نوڈلز": "Noodles",
	"جوس": "Juice",
	"کولڈ ڈرنک": "Cold Drink",
	"پیپسی": "Pepsi",
	"کوکا کولا": "Coca Cola",
	}

	# ── Canonical grocery item list (for fuzzy matching) ─────────────────────────
	GROCERY_LEXICON: list[str] = [
	"Atta", "Chawal", "Daal", "Chanay", "Masoor", "Maash", "Besan",
	"Maida", "Suji", "Cheeni", "Gur", "Namak", "Shakar",
	"Tel", "Ghee", "Banaspati", "Makkhan",
	"Mirch", "Haldi", "Dhaniya", "Zeera", "Ajwain", "Kali Mirch",
	"Lal Mirch", "Garam Masala", "Adrak", "Lehsun", "Piyaz", "Tamatar", "Aloo",
	"Doodh", "Dahi", "Paneer", "Cream", "Lassi",
	"Chai", "Paani",
	"Gosht", "Murghi", "Machhli", "Anday", "Qeema",
	"Roti", "Naan", "Double Roti", "Biscuit", "Cake", "Bread",
	"Seb", "Kela", "Angoor", "Aam", "Gajar", "Matar",
	"Bhindi", "Gobhi", "Palak", "Baingan",
	"Sabun", "Sirka", "Achaar", "Chutney",
	"Burger", "Samosa", "Paratha", "Biryani",
	"Chips", "Noodles", "Juice", "Cold Drink", "Pepsi", "Coca Cola",
	"Rice", "Sugar", "Salt", "Oil", "Butter", "Milk", "Eggs", "Chicken",
	"Mutton", "Fish", "Flour", "Potato", "Onion", "Tomato", "Ginger", "Garlic",
	"Water", "Tea", "Coffee", "Soap", "Detergent", "Shampoo",
	]

	# ── OCR Misspelling Auto-Correction ──────────────────────────────────────────
	COMMON_MISSPELLINGS: dict[str, str] = {
	"bubiger": "Burger", "buger": "Burger", "brger": "Burger",
	"ata": "Atta", "aata": "Atta", "tta": "Atta",
	"cheni": "Cheeni", "chini": "Cheeni", "cheeni": "Cheeni",
	"chaval": "Chawal", "chawl": "Chawal", "chwal": "Chawal",
	"dal": "Daal", "daal": "Daal", "dal": "Daal",
	"gee": "Ghee", "ghi": "Ghee",
	"tel": "Tel", "oil": "Tel",
	"doodh": "Doodh", "dudh": "Doodh", "milk": "Doodh",
	"ande": "Anday", "andy": "Anday", "egg": "Anday", "eggs": "Anday",
	"murgi": "Murghi", "murgh": "Murghi", "chicken": "Murghi",
	"goosht": "Gosht", "gosth": "Gosht", "meat": "Gosht",
	"qeema": "Qeema", "keema": "Qeema", "kema": "Qeema",
	"namk": "Namak", "nmk": "Namak",
	"pyaz": "Piyaz", "piaz": "Piyaz", "onion": "Piyaz",
	"tmatar": "Tamatar", "tomato": "Tamatar",
	"alu": "Aloo", "aaloo": "Aloo", "potato": "Aloo",
	"hldi": "Haldi", "turmeric": "Haldi",
	"mirchi": "Mirch", "mrch": "Mirch",
	"chai": "Chai", "chay": "Chai", "tea": "Chai",
	"rotti": "Roti", "ruti": "Roti",
	"nan": "Naan",
	"chips": "Chips", "chps": "Chips",
	"smaosa": "Samosa", "smosa": "Samosa",
	"paratha": "Paratha", "pratha": "Paratha",
	"biryni": "Biryani", "bryani": "Biryani",
	"sabon": "Sabun", "soap": "Sabun",
	"pepsi": "Pepsi", "ppsi": "Pepsi",
	"cola": "Coca Cola", "coke": "Coca Cola",
	"juice": "Juice", "juce": "Juice",
	"noodls": "Noodles", "noodlez": "Noodles",
	"biscut": "Biscuit", "biskit": "Biscuit",
	"bred": "Bread", "braed": "Bread",
	"suger": "Sugar", "sugr": "Sugar",
	"flor": "Flour", "flwr": "Flour",
	}

	# ── Transaction Type Detection ────────────────────────────────────────────────
	TRANSACTION_KEYWORDS: dict[str, list[str]] = {
	"udhaar": [
	"ادھار", "اُدھار", "udhaar", "udhar", "udhr", "credit",
	"قرض", "قرضہ", "بعد میں", "baad mein", "ابھی نہیں",
	"khata", "کھاتا", "کھاتے",
	],
	# wasooli = collection/recovery → maps to 'debit' in scan.tsx (line 388)
	"wasooli": [
	"واصولی", "وصولی", "wasooli", "wasoli", "wasool",
	"recovery", "collection", "وصول",
	],
	"cash": [
	"نقد", "نقدی", "cash", "paid", "pesa", "پیسے",
	"ادا", "رقم", "jama", "جمع",
	],
	"return": [
	"واپسی", "واپس", "return", "refund", "wapsi", "wapis",
	],
	}

	# ── Unit Normalization ────────────────────────────────────────────────────────
	UNIT_MAP: dict[str, str] = {
	"kilogram": "kg", "kilograms": "kg", "kilo": "kg", "kg": "kg",
	"gram": "g", "grams": "g", "gm": "g", "g": "g",
	"liter": "liter", "litre": "liter", "liters": "liter", "l": "liter",
	"dozen": "dozen", "dz": "dozen", "doz": "dozen", "درجن": "dozen",
	"piece": "pc", "pieces": "pc", "pc": "pc", "pcs": "pc",
	"عدد": "pc", "کلو": "kg", "گرام": "g", "لیٹر": "liter",
	"packet": "pkt", "pkt": "pkt", "pack": "pkt", "پیکٹ": "pkt",
	}

	"""Image Preprocessing Pipeline for handwritten parchi images.

	Stages: CLAHE → Denoise → Sharpen → Adaptive threshold.
	Also provides quality analysis and multi-variant generation.
	"""




	logger = logging.getLogger("parchi.preprocess")

	# ── Constants ─────────────────────────────────────────────────────────────────
	CLAHE_CLIP = 2.5
	CLAHE_TILE = (8, 8)
	DENOISE_H = 10
	SHARPEN_KERNEL = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
	TARGET_LONG_EDGE = 1024 # 1024px max — enough detail for handwriting, fewer VLM tokens


	def analyze_quality(image: np.ndarray) -> Dict[str, float]:
	"""Return normalized quality metrics (0-1) for the input image."""
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) if len(image.shape) == 3 else image
	sharpness = min(1.0, cv2.Laplacian(gray, cv2.CV_64F).var() / 500)
	brightness = float(np.mean(gray)) / 255
	contrast = min(1.0, float(gray.std()) / 100)
	noise_raw = float(np.std(gray - cv2.GaussianBlur(gray, (5, 5), 0)))
	noise = max(0.0, 1.0 - noise_raw / 50)
	overall = (sharpness + brightness + contrast + noise) / 4
	return {
	"sharpness": round(sharpness, 3),
	"brightness": round(brightness, 3),
	"contrast": round(contrast, 3),
	"noise": round(noise, 3),
	"overall": round(overall, 3),
	}


	def resize_for_vlm(image: np.ndarray, max_edge: int = TARGET_LONG_EDGE) -> np.ndarray:
	"""Resize so longest edge ≤ max_edge (VLM memory savings)."""
	h, w = image.shape[:2]
	if max(h, w) <= max_edge:
	return image
	scale = max_edge / max(h, w)
	new_w, new_h = int(w * scale), int(h * scale)
	return cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)


	def auto_orient(image: np.ndarray) -> np.ndarray:
	"""Deskew using Hough lines (correct rotation up to ±45°)."""
	try:
	h, w = image.shape[:2]
	if min(h, w) < 100:
	return image
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	small = cv2.resize(gray, (max(400, w // 3), max(300, h // 3)))
	edges = cv2.Canny(small, 50, 150)
	lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold=int(len(small) * 0.3))
	if lines is not None:
	angles = []
	for line in lines[:20]:
	theta = line[0][1]
	angle = theta * 180 / np.pi - 90
	if -45 < angle < 45:
	angles.append(angle)
	if angles:
	median_angle = float(np.median(angles))
	if abs(median_angle) > 3:
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, median_angle, 1.0)
	return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC,
	borderMode=cv2.BORDER_REPLICATE)
	return image
	except Exception as e:
	logger.warning("auto_orient failed: %s", e)
	return image


	def enhance(rgb: np.ndarray) -> np.ndarray:
	"""Full preprocessing pipeline: orient → CLAHE → denoise → sharpen → binarize."""
	oriented = auto_orient(rgb)
	gray = cv2.cvtColor(oriented, cv2.COLOR_RGB2GRAY)

	# CLAHE for shadow/contrast normalization
	clahe = cv2.createCLAHE(clipLimit=CLAHE_CLIP, tileGridSize=CLAHE_TILE)
	enhanced = clahe.apply(gray)

	# Non-local means denoising
	denoised = cv2.fastNlMeansDenoising(enhanced, h=DENOISE_H)

	# Sharpen
	sharpened = cv2.filter2D(denoised, -1, SHARPEN_KERNEL)

	# Morphological closing (connect broken strokes)
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
	morphed = cv2.morphologyEx(sharpened, cv2.MORPH_CLOSE, kernel)

	# Adaptive threshold binarization
	binary = cv2.adaptiveThreshold(
	morphed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
	cv2.THRESH_BINARY, 15, 5
	)
	return cv2.cvtColor(binary, cv2.COLOR_GRAY2RGB)


	def preprocess_for_vlm(rgb: np.ndarray) -> np.ndarray:
	"""Lightweight preprocessing for VLM input (keep color, just resize + denoise)."""
	resized = resize_for_vlm(rgb)
	# Light denoise only — VLMs work better with natural images than binarized
	lab = cv2.cvtColor(resized, cv2.COLOR_RGB2LAB)
	l, a, b = cv2.split(lab)
	clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
	l = clahe.apply(l)
	merged = cv2.merge([l, a, b])
	return cv2.cvtColor(merged, cv2.COLOR_LAB2RGB)

	"""Brain Layer — Regex + Lexicon post-processor.

	Converts raw VLM text output into structured JSON:
	{ customer_name, items: [{name, qty, price}], total, transaction_type, mismatch }

	No LLM needed — deterministic, 0 RAM overhead, < 1ms latency.
	"""





	logger = logging.getLogger("parchi.brain")

	# ── Pre-compiled regex patterns ───────────────────────────────────────────────
	# Matches lines like: "Atta 2kg 200", "چینی 1 150", "Daal x3 - 450"
	RE_ITEM_LINE = re.compile(
	r"(?P<name>[A-Za-z\u0600-\u06FF\u0750-\u077F\s\-\.]+?)" # item name (Latin or Urdu)
	r"\s[xX×\-]?\s"
	r"(?P<qty>\d+(?:\.\d+)?)\s*" # quantity
	r"(?P<unit>kg\|g\|gram\|liter\|litre\|pkt\|pc\|dozen\|dz\|کلو\|گرام\|لیٹر\|عدد\|درجن)?\s*"
	r"[\-–—:=]?\s*"
	r"(?:Rs\.?\s\|PKR\s\|₨\s*)?" # optional currency
	r"(?P<price>\d+(?:\.\d+)?)", # price
	re.IGNORECASE \| re.UNICODE,
	)

	# Simpler fallback: just "name price" on a line
	RE_SIMPLE_LINE = re.compile(
	r"^(?P<name>[A-Za-z\u0600-\u06FF\u0750-\u077F\s\-\.]{2,}?)\s+"
	r"(?:Rs\.?\s\|PKR\s\|₨\s*)?"
	r"(?P<price>\d+(?:\.\d{1,2})?)$",
	re.IGNORECASE \| re.UNICODE \| re.MULTILINE,
	)

	RE_TOTAL = re.compile(
	r"(?:total\|ٹوٹل\|کل\|جمع\|مجموعی\|grand\s*total\|net\|amount)"
	r"\s[:=\-–—]?\s(?:Rs\.?\s\|PKR\s\|₨\s*)?"
	r"(\d+(?:\.\d+)?)",
	re.IGNORECASE \| re.UNICODE,
	)

	RE_NAME = re.compile(
	r"(?:name\|نام\|customer\|گاہک\|صارف)\s[:=\-–—]?\s(.+)",
	re.IGNORECASE \| re.UNICODE,
	)


	def transliterate_urdu(text: str) -> str:
	"""Replace Urdu words with their English transliterations."""
	for urdu, eng in URDU_TO_ENGLISH.items():
	text = text.replace(urdu, eng)
	return text


	def _repair_got_ocr_fragments(raw: str) -> str:
	"""
	GOT-OCR produces fragmented words (e.g. 'Che eni' for 'Cheeni', 'b v 0 ger' for 'Burger').
	This repair stage:
	1. Strips noise tokens (single chars, '0' mixed with text)
	2. Collapses consecutive short tokens into a single word and fuzzy-matches them
	3. Returns a cleaner string suitable for the brain parser
	"""
	lines = []
	for line in raw.split("\n"):
	tokens = line.split()
	if not tokens:
	continue
	repaired_tokens = []
	i = 0
	while i < len(tokens):
	tok = tokens[i]
	# Skip pure noise: single letter that is not a unit, or '0' between words
	if len(tok) == 1 and not tok.isdigit() and tok.lower() not in ("v", "l", "g"):
	i += 1
	continue
	# Try merging consecutive short non-numeric tokens into one word
	if len(tok) <= 3 and not re.match(r"^\d", tok):
	merged = tok
	j = i + 1
	while j < len(tokens) and len(tokens[j]) <= 4 and not re.match(r"^\d", tokens[j]):
	merged += tokens[j]
	j += 1
	# Check if merged form fuzzy-matches the lexicon
	candidate = fuzz_process.extractOne(
	merged.lower(), GROCERY_LEXICON, scorer=fuzz.WRatio, score_cutoff=55
	)
	if candidate:
	repaired_tokens.append(candidate[0])
	i = j
	continue
	repaired_tokens.append(tok)
	i += 1
	if repaired_tokens:
	lines.append(" ".join(repaired_tokens))
	return "\n".join(lines)


	def correct_item_name(raw: str, aggressive: bool = False) -> str:
	"""Auto-correct OCR garbage using misspelling map + fuzzy lexicon match.

	When aggressive=True (used for GOT-OCR fallback), the fuzzy threshold is
	lowered to 55 to catch highly fragmented token output.
	"""
	cleaned = raw.strip().lower()
	threshold = 55 if aggressive else 70

	# Step 1: Direct misspelling lookup
	if cleaned in COMMON_MISSPELLINGS:
	return COMMON_MISSPELLINGS[cleaned]

	# Step 2: Fuzzy match against canonical lexicon
	match = fuzz_process.extractOne(
	cleaned, GROCERY_LEXICON, scorer=fuzz.WRatio, score_cutoff=threshold
	)
	if match:
	return match[0]

	# Step 3: Return title-cased original
	return raw.strip().title()


	def normalize_unit(raw: str) -> str:
	"""Normalize unit string using the UNIT_MAP."""
	return UNIT_MAP.get(raw.lower().strip(), "pc")


	def detect_transaction_type(text: str) -> str:
	"""Detect transaction type from raw text using keyword matching."""
	text_lower = text.lower()
	scores = {}
	for tx_type, keywords in TRANSACTION_KEYWORDS.items():
	score = sum(1 for kw in keywords if kw.lower() in text_lower)
	if score > 0:
	scores[tx_type] = score
	if scores:
	return max(scores, key=scores.get)
	return "unknown"


	# Known grocery/item words that should NOT be treated as customer names
	_ITEM_WORDS_LOWER = {w.lower() for w in GROCERY_LEXICON} \| {
	w.lower() for w in COMMON_MISSPELLINGS
	} \| {
	"total", "udhaar", "wasooli", "cash", "rs", "pkr", "amount",
	"date", "bill", "invoice", "receipt", "parchi",
	}


	def extract_customer_name(text: str) -> Optional[str]:
	"""
	Extract customer name from raw text.

	Strategy (in priority order):
	1. Explicit 'name:' / 'customer:' label
	2. Capitalized proper name at the very START of text (before any item/digit)
	3. First pure-text line in the top 20% of the receipt
	"""
	# Strategy 1: explicit label
	m = RE_NAME.search(text)
	if m:
	name = m.group(1).strip()
	name = re.sub(r"[\d\-\u2013\u2014:=]+$", "", name).strip()
	if 2 <= len(name) <= 50:
	return name

	# Strategy 2: capitalized proper name at the START of text
	# Matches e.g. "Umar", "Umar Khan", "Muhammad Ali" before the first digit/item
	start_match = re.match(
	r"^([A-Z][a-z]{1,20}(?:\s+[A-Z][a-z]{1,20}){0,2})",
	text.strip(),
	)
	if start_match:
	candidate = start_match.group(1).strip()
	# Reject if it's a known grocery item or keyword
	if candidate.lower() not in _ITEM_WORDS_LOWER:
	return candidate

	# Strategy 3: first pure-text line in top 20% of multiline text
	lines = text.strip().split("\n")
	top_lines = lines[:max(3, len(lines) // 5)]
	skip = {"total", "\u0679\u0648\u0679\u0644", "\u06a9\u0644", "\u062c\u0645\u0639",
	"date", "\u062a\u0627\u0631\u06cc\u062e", "rs", "pkr"}
	for line in top_lines:
	line = line.strip()
	if not line or len(line) < 2:
	continue
	if any(kw in line.lower() for kw in skip):
	continue
	if re.match(r"^\d+[\.\-/]\d+", line): # date-like
	continue
	if not re.search(r"\d", line): # pure text line
	return line[:50]
	return None


	def extract_total(text: str) -> Optional[float]:
	"""Extract total amount from text."""
	m = RE_TOTAL.search(text)
	if m:
	try:
	return float(m.group(1))
	except ValueError:
	pass
	return None


	def parse_items(text: str) -> List[Dict[str, Any]]:
	"""
	Extract line items from raw OCR text.

	Deduplication key is (name, price) so that the same item with different
	prices (e.g. 'milk-3 1200' and 'milk-2 500') is preserved as two entries.
	"""
	items: List[Dict[str, Any]] = []
	seen_keys: set = set()

	# Pass 1: Full pattern (name + qty + price)
	for m in RE_ITEM_LINE.finditer(text):
	name = correct_item_name(m.group("name"))
	qty = float(m.group("qty"))
	price = float(m.group("price"))
	unit = normalize_unit(m.group("unit") or "pc")

	if price < 1 or price > 50000 or qty <= 0 or qty > 1000:
	continue
	# Dedup by (name, price) — allows same item at different prices
	key = f"{name.lower()}:{price}"
	if key in seen_keys:
	continue
	seen_keys.add(key)
	items.append({"name": name, "quantity": qty, "price": price, "unit": unit})

	# Pass 2: Simple fallback (name + price only)
	if not items:
	for m in RE_SIMPLE_LINE.finditer(text):
	name = correct_item_name(m.group("name"))
	price = float(m.group("price"))
	if price < 1 or price > 50000:
	continue
	key = f"{name.lower()}:{price}"
	if key in seen_keys:
	continue
	seen_keys.add(key)
	items.append({"name": name, "quantity": 1.0, "price": price, "unit": "pc"})

	return items


	def validate_math(items: List[Dict[str, Any]], extracted_total: Optional[float]) -> bool:
	"""
	Return True if mismatch detected.

	Pakistani parchi convention: the price on each item line is the LINE TOTAL
	(e.g. '2.5kg Cheeni 200' means Rs 200 for 2.5kg total, NOT Rs 200/kg).
	So computed total = sum(item prices), NOT sum(qty * price).
	"""
	if not extracted_total or not items:
	return False
	computed = sum(item["price"] for item in items) # line totals
	tolerance = max(2.0, extracted_total * 0.05) # 5% or Rs 2
	return abs(computed - extracted_total) > tolerance


	def process_raw_text(raw_text: str) -> Dict[str, Any]:
	"""
	Master brain function: raw VLM output -> structured parchi JSON.

	Field names match SmartParchiBackendItem in scan.tsx:
	items[].name, items[].quantity (NOT qty), items[].price
	Also sends total_amount (preferred by scan.tsx) alongside total (fallback).
	"""
	# Step 1: Transliterate Urdu -> English
	text = transliterate_urdu(raw_text)

	# Step 2: Extract structured fields
	customer_name = extract_customer_name(text)
	items = parse_items(text)
	total = extract_total(text)
	transaction_type = detect_transaction_type(text)

	# Step 3: If no explicit total, compute from item LINE TOTALS (Pakistani convention)
	if total is None and items:
	total = sum(item["price"] for item in items)

	# Step 4: Math validation
	mismatch = validate_math(items, total)

	total_val = total or 0.0
	return {
	"customer_name": customer_name,
	# 'quantity' matches SmartParchiBackendItem.quantity in scan.tsx rowsFromBackendItems()
	"items": [
	{"name": it["name"], "quantity": it["quantity"], "price": it["price"]}
	for it in items
	],
	"total": total_val, # legacy fallback
	"total_amount": total_val, # preferred by scan.tsx line 370
	"transaction_type": transaction_type,
	"mismatch": mismatch,
	}


	def try_parse_json_response(text: str) -> Optional[Dict[str, Any]]:
	"""
	If VLM returned JSON directly (Gemini / OpenRouter / Qaari with JSON prompt),
	parse and normalize it into our standard output schema.
	Returns None if text is not valid JSON or lacks required fields.
	"""
	import json

	json_match = re.search(r"\{[\s\S]*\}", text)
	if not json_match:
	return None
	try:
	data = json.loads(json_match.group())
	if not ("items" in data or "total" in data):
	return None
	return _normalize_api_result(data)
	except json.JSONDecodeError:
	return None


	def _normalize_api_result(data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Normalize a raw dict from Gemini/OpenRouter/Qaari JSON into our
	standard schema matching SmartParchiBackendItem in scan.tsx.
	"""
	import json

	# --- Normalize items ---
	raw_items = data.get("items", []) or []
	items: List[Dict[str, Any]] = []
	for it in raw_items:
	if not isinstance(it, dict):
	continue
	name = it.get("name") or it.get("item") or ""
	if not name:
	continue
	name = correct_item_name(str(name))
	qty = float(it.get("quantity") or it.get("qty") or 1.0)
	price = float(it.get("price") or it.get("total_price") or 0.0)
	items.append({"name": name, "quantity": qty, "price": price})

	# --- Normalize totals ---
	total_raw = data.get("total") or data.get("total_amount") or 0.0
	total_val = float(total_raw) if total_raw else 0.0
	if total_val == 0.0 and items:
	total_val = sum(it["price"] for it in items)

	# --- Normalize transaction type ---
	tx = str(data.get("transaction_type") or "unknown").lower()
	if tx not in ("udhaar", "wasooli", "cash", "return", "unknown"):
	tx = detect_transaction_type(tx) # fuzzy-match via brain

	# --- Normalize customer name ---
	cname = data.get("customer_name") or None
	if isinstance(cname, str) and not cname.strip():
	cname = None

	return {
	"customer_name": cname,
	"items": items,
	"total": total_val,
	"total_amount": total_val,
	"transaction_type": tx,
	"mismatch": validate_math(items, total_val),
	}

	"""SmartOCR Engine — Lazy-loading VLM manager.

	Primary: Qaari-0.1-Urdu-OCR-VL-2B (Qwen2-VL fine-tuned for Urdu Nastaliq)
	Fallback: GOT-OCR 2.0 (580MB layout specialist, loaded only on primary failure)

	Memory strategy:
	- Models loaded lazily on first request (not at startup).
	- Only ONE model in RAM at a time.
	- gc.collect() after every inference pass.
	- Memory guard: abort if RSS > VLM_MEMORY_LIMIT_MB.
	"""




	logger = logging.getLogger("parchi.ocr_engine")

	# ── Config from environment ───────────────────────────────────────────────────
	# Qaari is a PEFT LoRA adapter; base model is required to load it
	BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "Qwen/Qwen2-VL-2B-Instruct")
	PRIMARY_MODEL_ID = os.getenv("PRIMARY_MODEL_ID", "oddadmix/Qaari-0.1-Urdu-OCR-VL-2B-Instruct")
	FALLBACK_MODEL_ID = os.getenv("FALLBACK_MODEL_ID", "stepfun-ai/GOT-OCR-2.0-hf")
	ENABLE_FALLBACK = os.getenv("ENABLE_FALLBACK", "1").strip() not in ("0", "false", "no")
	VLM_MEMORY_LIMIT_MB = float(os.getenv("VLM_MEMORY_LIMIT_MB", "12000"))
	# CRITICAL: HF Space may have VLM_TIMEOUT_SECONDS=75 as env var — set it to 300 in Space settings.
	# 60 BPE tokens ≈ 240 chars — enough for any grocery receipt; keeps CPU inference under 2 min.
	VLM_MAX_TOKENS = int(os.getenv("VLM_MAX_NEW_TOKENS", "60"))
	VLM_TIMEOUT = float(os.getenv("VLM_TIMEOUT_SECONDS", "300")) # override in HF Space env to 300

	# ── Cloud API Keys (Engine 1 & 2 — fast path) ─────────────────────────────────
	# Engine 1: Gemini 2.5 Flash — 2-3s, free 250-1000 req/day
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyAb25SsZIRcDIEbFc1P5s--LIqcHWdnH64") # gen-lang-client-0429107468
	GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") # confirmed 200 OK from Oracle server
	# CRITICAL: Google API uses colon (:) not slash (/) before the method name
	GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/{}:generateContent"

	# Engine 2: OpenRouter free VLM cascade (tried in order; stop at first success)
	# Exact slugs verified via GET /api/v1/models on 2026-05-09
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY",
	"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")
	OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
	OPENROUTER_MODELS = [
	"baidu/qianfan-ocr-fast:free", # OCR-specialized fastest
	"google/gemma-4-26b-a4b-it:free", # Gemma 4 26B A4B-IT
	"google/gemma-4-31b-it:free", # Gemma 4 31B-IT
	"nvidia/nemotron-nano-12b-v2-vl:free", # NVIDIA Nemotron 12B VL
	"nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free", # NVIDIA Nemotron Omni
	]


	def _rss_mb() -> float:
	"""Current process RSS in MB."""
	try:
	import psutil
	return psutil.Process().memory_info().rss / 1024 / 1024
	except Exception:
	return 0.0


	def _free_mem():
	"""Aggressively release memory."""
	gc.collect()
	try:
	import torch
	torch.cuda.empty_cache()
	except Exception:
	pass


	class SmartOCR:
	"""Manages VLM lifecycle: lazy load → inference → cleanup."""

	def __init__(self):
	self._primary_model = None
	self._primary_processor = None
	self._fallback_model = None
	self._fallback_processor = None
	self._lock = threading.Lock()
	self._primary_loaded = False
	self._fallback_loaded = False
	self._primary_failed = False

	# ── Lazy Loaders ──────────────────────────────────────────────────────────

	def _load_primary(self):
	"""Load Qaari-0.1 as a PEFT LoRA adapter on top of Qwen2-VL-2B-Instruct."""
	if self._primary_loaded:
	return
	with self._lock:
	if self._primary_loaded:
	return
	logger.info("Loading base model: %s ...", BASE_MODEL_ID)
	logger.info("Applying PEFT adapter: %s ...", PRIMARY_MODEL_ID)
	t0 = time.time()
	try:
	import torch
	from peft import PeftModel
	from transformers import AutoProcessor, Qwen2VLForConditionalGeneration

	# Step 1: Load Qwen2-VL-2B-Instruct base in fp32 for CPU
	base_model = Qwen2VLForConditionalGeneration.from_pretrained(
	BASE_MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	low_cpu_mem_usage=True,
	)

	# Step 2: Merge the Qaari LoRA adapter onto the base
	self._primary_model = PeftModel.from_pretrained(base_model, PRIMARY_MODEL_ID)
	self._primary_model.eval()

	# Processor comes from the base model (Qaari has no separate processor)
	self._primary_processor = AutoProcessor.from_pretrained(BASE_MODEL_ID)
	self._primary_loaded = True
	logger.info(
	"Primary model (base+adapter) loaded in %.1fs \| RSS=%.0f MB",
	time.time() - t0, _rss_mb(),
	)
	except Exception as e:
	logger.error("Primary model load FAILED: %s", e)
	self._primary_failed = True
	_free_mem()

	def _load_fallback(self):
	"""Load GOT-OCR 2.0 — only called if primary fails."""
	if self._fallback_loaded:
	return
	with self._lock:
	if self._fallback_loaded:
	return
	# Unload primary to free RAM
	self._unload_primary()
	logger.info("Loading fallback model: %s ...", FALLBACK_MODEL_ID)
	t0 = time.time()
	try:
	import torch
	from transformers import AutoModelForImageTextToText, AutoProcessor

	self._fallback_model = AutoModelForImageTextToText.from_pretrained(
	FALLBACK_MODEL_ID,
	torch_dtype=torch.float32,
	device_map="cpu",
	low_cpu_mem_usage=True,
	trust_remote_code=True,
	)
	self._fallback_model.eval()
	self._fallback_processor = AutoProcessor.from_pretrained(
	FALLBACK_MODEL_ID, trust_remote_code=True
	)
	self._fallback_loaded = True
	logger.info(
	"Fallback model loaded in %.1fs \| RSS=%.0f MB",
	time.time() - t0, _rss_mb(),
	)
	except Exception as e:
	logger.error("Fallback model load FAILED: %s", e)
	_free_mem()

	def _unload_primary(self):
	"""Free primary model from RAM."""
	self._primary_model = None
	self._primary_processor = None
	self._primary_loaded = False
	_free_mem()
	logger.info("Primary model unloaded \| RSS=%.0f MB", _rss_mb())

	def _unload_fallback(self):
	"""Free fallback model from RAM."""
	self._fallback_model = None
	self._fallback_processor = None
	self._fallback_loaded = False
	_free_mem()
	logger.info("Fallback model unloaded \| RSS=%.0f MB", _rss_mb())

	# ── Memory Guard ──────────────────────────────────────────────────────────

	def _check_memory(self) -> bool:
	"""Return True if safe to proceed."""
	rss = _rss_mb()
	if rss > VLM_MEMORY_LIMIT_MB:
	logger.warning("RSS %.0f MB exceeds limit %.0f MB — aborting", rss, VLM_MEMORY_LIMIT_MB)
	return False
	return True

	# ── Inference: Primary (Qaari) ────────────────────────────────────────────

	def _infer_qaari(self, pil_image: Image.Image) -> Optional[str]:
	"""Run Qaari-0.1 inference on a PIL image. Returns raw text or None."""
	try:
	import concurrent.futures
	import torch
	from qwen_vl_utils import process_vision_info

	self._load_primary()
	if not self._primary_loaded or not self._check_memory():
	return None

	# Plain-text prompt — Qaari (2B) is an OCR model, NOT a JSON generator.
	# Asking for JSON produces malformed/truncated output.
	# Gemini/OpenRouter handle JSON; Qaari outputs clean plain text.
	prompt = (
	"You are a Pakistani grocery receipt (parchi) OCR reader. "
	"Read this handwritten receipt and output ALL text clearly:\n"
	"Line 1: customer name (if visible at top)\n"
	"Line 2: transaction type (udhaar / wasooli / cash)\n"
	"Lines 3+: each item as: name quantity unit price\n"
	"Last line: Total amount\n"
	"Output plain text only. Do not explain."
	)

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": pil_image},
	{"type": "text", "text": prompt},
	],
	}
	]

	text_input = self._primary_processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	image_inputs, video_inputs = process_vision_info(messages)
	inputs = self._primary_processor(
	text=[text_input],
	images=image_inputs,
	videos=video_inputs,
	padding=True,
	return_tensors="pt",
	)
	# CPU -- no .to("cuda")

	def _generate():
	with torch.no_grad():
	return self._primary_model.generate(
	**inputs,
	max_new_tokens=VLM_MAX_TOKENS,
	do_sample=False,
	use_cache=True, # KV-cache: mandatory for fast CPU decoding
	repetition_penalty=1.2, # Prevents looping; triggers early EOS
	)

	# Enforce hard timeout on generate() so long images don't block forever
	with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
	future = pool.submit(_generate)
	try:
	output_ids = future.result(timeout=VLM_TIMEOUT)
	except concurrent.futures.TimeoutError:
	logger.warning(
	"Qaari generate() timed out after %.0fs -- returning partial",
	VLM_TIMEOUT,
	)
	# Cancel is best-effort on CPU; return None to trigger fallback
	return None

	# Trim input tokens from output
	trimmed = [
	out[len(inp):] for inp, out in zip(inputs.input_ids, output_ids)
	]
	result = self._primary_processor.batch_decode(
	trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)[0]

	logger.info("Qaari output (%d chars): %.100s...", len(result), result)
	return result

	except Exception as e:
	logger.error("Qaari inference failed: %s", e)
	self._primary_failed = True
	return None
	finally:
	_free_mem()

	# ── Inference: Fallback (GOT-OCR) ─────────────────────────────────────────

	def _infer_got_ocr(self, pil_image: Image.Image) -> Optional[str]:
	"""Run GOT-OCR 2.0 fallback. Returns raw text or None."""
	if not ENABLE_FALLBACK:
	return None
	try:
	import torch

	self._load_fallback()
	if not self._fallback_loaded or not self._check_memory():
	return None

	inputs = self._fallback_processor(pil_image, return_tensors="pt")

	with torch.no_grad():
	output_ids = self._fallback_model.generate(
	**inputs,
	do_sample=False,
	tokenizer=self._fallback_processor.tokenizer,
	stop_strings="<\|im_end\|>",
	max_new_tokens=VLM_MAX_TOKENS,
	)

	result = self._fallback_processor.decode(
	output_ids[0, inputs["input_ids"].shape[1]:],
	skip_special_tokens=True,
	)

	logger.info("GOT-OCR output (%d chars): %.100s...", len(result), result)
	return result

	except Exception as e:
	logger.error("GOT-OCR inference failed: %s", e)
	return None
	finally:
	_free_mem()

	# ── Public API ────────────────────────────────────────────────────────────

	# ── Engine 1: Gemini 2.5 Flash API ────────────────────────────────────────

	def _infer_gemini_api(self, image_bytes: bytes) -> Optional[Dict[str, Any]]:
	"""Call Gemini REST API. Returns normalized dict or None."""
	if not GEMINI_API_KEY:
	return None
	import base64, json as _json
	import httpx

	CLOUD_PROMPT = (
	"Pakistani grocery receipt OCR. Rules:\n"
	"1. Name at top with no price beside it = customer_name\n"
	"2. udhaar/\u0627\u062f\u06be\u0627\u0631=credit, wasooli/\u0648\u0627\u0635\u0648\u0644\u06cc=payment, cash/\u0646\u0642\u062f=cash\n"
	"3. Each item line: [Name] [qty][unit] [LINE_TOTAL]. "
	"Last number = LINE TOTAL price (not unit price). "
	"cheeni-2.5 200 -> Cheeni qty=2.5 price=200\n"
	"4. Number after Total/\u06a9\u0644/\u062c\u0645\u0639 = grand total\n"
	"5. Fix OCR errors (g->9, I->1 if context requires)\n"
	"Return ONLY valid JSON (no markdown):\n"
	'{"customer_name":null,"transaction_type":"unknown",'
	'"items":[{"name":"Atta","quantity":2.0,"unit":"kg","price":200.0}],'
	'"total":200.0}'
	)

	try:
	mime = "image/jpeg"
	encoded = base64.b64encode(image_bytes).decode()
	payload = {
	"contents": [{
	"parts": [
	{"text": CLOUD_PROMPT},
	{"inline_data": {"mime_type": mime, "data": encoded}},
	]
	}],
	"generationConfig": {
	"temperature": 0.1,
	"maxOutputTokens": 1024, # 512 caused truncation on complex parchis
	# responseMimeType removed -- not supported on all model versions
	},
	}
	url = GEMINI_URL.format(GEMINI_MODEL)
	with httpx.Client(timeout=30.0) as client:
	resp = client.post(
	f"{url}?key={GEMINI_API_KEY}",
	json=payload,
	headers={"Content-Type": "application/json"},
	)
	if resp.status_code == 429:
	logger.warning("Gemini rate-limited (429) -- trying OpenRouter")
	return None
	if resp.status_code != 200:
	logger.warning("Gemini API error %d: %.200s", resp.status_code, resp.text)
	return None

	raw = resp.json()["candidates"][0]["content"]["parts"][0]["text"]
	# Robust JSON extraction: handles plain JSON, markdown fences, partial wrapping
	data = None
	try:
	data = _json.loads(raw.strip())
	except _json.JSONDecodeError:
	pass
	if data is None:
	import re as _re
	cleaned = _re.sub(r"```(?:json)?\s*", "", raw).strip().rstrip("`").strip()
	try:
	data = _json.loads(cleaned)
	except _json.JSONDecodeError:
	pass
	if data is None:
	m = _re.search(r"\{[\s\S]*\}", raw)
	if m:
	try:
	data = _json.loads(m.group())
	except _json.JSONDecodeError:
	pass
	if not data:
	logger.warning("Gemini non-JSON (truncated?): %.120s", raw)
	return None

	logger.info("Engine 1 (Gemini) success: %d items", len(data.get("items", [])))
	return _normalize_api_result(data)

	except Exception as e:
	logger.warning("Gemini inference failed: %s", e)
	return None

	# ── Engine 2: OpenRouter Free VLM Cascade ─────────────────────────────────

	def _infer_openrouter_api(self, image_bytes: bytes) -> Optional[Dict[str, Any]]:
	"""Try each free OpenRouter VLM model in sequence. Returns first success."""
	if not OPENROUTER_API_KEY:
	return None
	import base64, json as _json
	import httpx

	CLOUD_PROMPT = (
	"Pakistani grocery receipt. Extract: customer name (top, no price beside), "
	"transaction type (udhaar=credit/wasooli=payment/cash), items with qty+unit+price "
	"(last number on each line is LINE TOTAL, not unit price), and grand total. "
	"Return ONLY valid JSON: "
	'{"customer_name":null,"transaction_type":"unknown",'
	'"items":[{"name":"","quantity":1.0,"unit":"pc","price":0.0}],"total":0.0}'
	)

	encoded = base64.b64encode(image_bytes).decode()
	img_url = f"data:image/jpeg;base64,{encoded}"
	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	"HTTP-Referer": "https://bazaar-bridge.app",
	"X-Title": "Bazaar Bridge OCR",
	}

	for model in OPENROUTER_MODELS:
	try:
	payload = {
	"model": model,
	"messages": [{
	"role": "user",
	"content": [
	{"type": "image_url",
	"image_url": {"url": img_url}},
	{"type": "text", "text": CLOUD_PROMPT},
	],
	}],
	"max_tokens": 512,
	"temperature": 0.1,
	}
	with httpx.Client(timeout=25.0) as client:
	resp = client.post(OPENROUTER_URL, json=payload, headers=headers)

	if resp.status_code == 429:
	logger.warning("OpenRouter model %s rate-limited — trying next", model)
	continue
	if resp.status_code not in (200, 201):
	logger.warning("OpenRouter %s returned %d — trying next", model, resp.status_code)
	continue

	content = resp.json()["choices"][0]["message"]["content"]
	# Extract JSON from content (may have markdown code fences)
	json_match = re.search(r"\{[\s\S]*\}", content)
	if not json_match:
	logger.warning("OpenRouter %s returned no JSON — trying next", model)
	continue
	data = _json.loads(json_match.group())
	logger.info("Engine 2 (OpenRouter/%s) success: %d items",
	model, len(data.get("items", [])))
	return _normalize_api_result(data)

	except Exception as e:
	logger.warning("OpenRouter model %s failed: %s — trying next", model, e)
	continue

	logger.warning("All OpenRouter models failed — falling back to local Qaari")
	return None

	def extract_structured(self, image_bytes: bytes) -> Optional[Dict[str, Any]]:
	"""
	Try fast cloud APIs (Engine 1 + 2) and return structured result dict.
	Returns None if both fail (caller should use local VLM fallback).
	"""
	# Engine 1: Gemini
	result = self._infer_gemini_api(image_bytes)
	if result:
	result["_engine"] = "gemini"
	return result

	# Engine 2: OpenRouter cascade
	result = self._infer_openrouter_api(image_bytes)
	if result:
	result["_engine"] = "openrouter"
	return result

	return None

	def extract_text(self, pil_image: Image.Image) -> str:
	"""
	Local VLM extraction (Engine 3 — emergency fallback only).
	Qaari -> GOT-OCR. Returns plain text for brain layer processing.
	"""
	# Qaari primary
	if not self._primary_failed:
	result = self._infer_qaari(pil_image)
	if result and len(result.strip()) > 5:
	return result

	# GOT-OCR secondary
	logger.info("Primary returned nothing useful -- trying GOT-OCR fallback")
	result = self._infer_got_ocr(pil_image)
	if result and len(result.strip()) > 5:
	repaired = _repair_got_ocr_fragments(result)
	logger.info("GOT-OCR repaired (%d->%d chars): %.100s...",
	len(result), len(repaired), repaired)
	return repaired

	return ""

	def health_check(self) -> dict:
	"""Return engine status for /health endpoint."""
	return {
	"engine1_gemini": "ready" if GEMINI_API_KEY else "disabled",
	"engine2_openrouter": "ready" if OPENROUTER_API_KEY else "disabled",
	"engine2_models": OPENROUTER_MODELS,
	"engine3_primary": PRIMARY_MODEL_ID,
	"primary_loaded": self._primary_loaded,
	"primary_failed": self._primary_failed,
	"engine3_fallback": FALLBACK_MODEL_ID,
	"fallback_enabled": ENABLE_FALLBACK,
	"fallback_loaded": self._fallback_loaded,
	"rss_mb": round(_rss_mb(), 1),
	"memory_limit_mb": VLM_MEMORY_LIMIT_MB,
	}

	"""Smart Parchi OCR v7 — FastAPI Orchestrator.

	Local Hybrid Architecture:
	Vision → Qaari-0.1 (primary) / GOT-OCR 2.0 (fallback)
	Brain → Regex + Pakistani Lexicon (deterministic JSON formatting)

	Endpoints:
	POST /process-parchi → structured JSON extraction from receipt image
	GET /health → engine status + memory usage
	"""





	# ── Suppress noisy warnings ──────────────────────────────────────────────────
	warnings.filterwarnings("ignore")
	os.environ.setdefault("OMP_NUM_THREADS", "1")
	os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

	# ── Logging ───────────────────────────────────────────────────────────────────
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s \| %(name)s \| %(levelname)s \| %(message)s",
	)
	logger = logging.getLogger("parchi.app")

	# ── Constants ─────────────────────────────────────────────────────────────────
	MAX_IMAGE_SIZE_MB = 10
	CONCURRENCY_LIMIT = 1 # 1 worker only — Qwen2-VL-2B fp32 uses ~9GB on CPU
	CACHE_SIZE = 50 # LRU cache entries
	CACHE_TTL = 3600 # 1 hour

	# ── Globals ───────────────────────────────────────────────────────────────────
	ocr_engine = SmartOCR()
	semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)
	result_cache: Dict[str, dict] = {} # hash → {result, timestamp}

	# ── Async Job Store (bypasses HF platform HTTP timeout) ──────────────────────────
	# Jobs older than JOB_TTL seconds are pruned automatically
	JOB_TTL = 3600 # 1 hour
	job_store: Dict[str, dict] = {} # job_id → {status, result, ts, error}

	# ── FastAPI App ───────────────────────────────────────────────────────────────
	from contextlib import asynccontextmanager

	@asynccontextmanager
	async def lifespan(app: FastAPI):
	"""Pre-warm the VLM at container startup so first request isn't penalized."""
	logger.info("=== Startup: pre-warming primary OCR model ===")
	loop = asyncio.get_event_loop()
	try:
	await loop.run_in_executor(None, ocr_engine._load_primary)
	logger.info("=== Startup: model ready \| RSS=%.0f MB ===", _rss_mb())
	except Exception as e:
	logger.error("=== Startup: model pre-warm FAILED: %s ===", e)
	yield # App runs here
	logger.info("=== Shutdown: releasing model ===")
	ocr_engine._unload_primary()
	ocr_engine._unload_fallback()

	app = FastAPI(
	title="Smart Parchi OCR v7",
	description=(
	"Local Hybrid OCR for Pakistani handwritten receipts. "
	"Qaari-0.1 (Urdu Nastaliq) + GOT-OCR 2.0 fallback. No external APIs."
	),
	version="7.0.0",
	lifespan=lifespan,
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)


	# ── Cache Helpers ─────────────────────────────────────────────────────────────

	def _image_hash(data: bytes) -> str:
	return hashlib.sha256(data).hexdigest()[:16]


	def _cache_get(h: str) -> dict \| None:
	entry = result_cache.get(h)
	if entry and (time.time() - entry["ts"]) < CACHE_TTL:
	return entry["result"]
	if entry:
	del result_cache[h]
	return None


	def _cache_put(h: str, result: dict):
	if len(result_cache) >= CACHE_SIZE:
	oldest_key = min(result_cache, key=lambda k: result_cache[k]["ts"])
	del result_cache[oldest_key]
	result_cache[h] = {"result": result, "ts": time.time()}


	# ── Image Loading ─────────────────────────────────────────────────────────────

	def load_image(raw_bytes: bytes) -> np.ndarray:
	"""Load image bytes -> RGB numpy array, with size validation."""
	size_mb = len(raw_bytes) / (1024 * 1024)
	if size_mb > MAX_IMAGE_SIZE_MB:
	raise ValueError(f"Image too large: {size_mb:.1f} MB (max {MAX_IMAGE_SIZE_MB})")
	pil = Image.open(io.BytesIO(raw_bytes)).convert("RGB")
	return np.array(pil)


	# ── Core Processing ───────────────────────────────────────────────────────────

	def process_image(rgb: np.ndarray, raw_bytes: bytes = None) -> Dict[str, Any]:
	"""Full pipeline: cloud APIs first -> VLM fallback -> brain -> structured JSON."""
	t0 = time.time()

	# Step 1: Image quality analysis
	quality = analyze_quality(rgb)
	logger.info("Image quality: %s", quality)

	# Step 2: Try fast cloud APIs (Engine 1: Gemini, Engine 2: OpenRouter)
	if raw_bytes and (GEMINI_API_KEY or OPENROUTER_API_KEY):
	struct_result = ocr_engine.extract_structured(raw_bytes)
	if struct_result:
	engine_name = struct_result.pop("_engine", "cloud")
	struct_result["processing_time_ms"] = int((time.time() - t0) * 1000)
	struct_result["raw_text"] = f"[{engine_name.upper()} API]"
	struct_result["image_quality"] = quality
	struct_result["engine"] = {**ocr_engine.health_check(),
	"active_engine": engine_name}
	logger.info("Cloud engine (%s) returned result in %.1fs",
	engine_name, time.time() - t0)
	return struct_result

	# Step 3: Preprocess for local VLM (Engine 3: Qaari + GOT-OCR)
	logger.info("Cloud engines unavailable — falling back to local Qaari (Engine 3)")
	processed = preprocess_for_vlm(rgb)
	pil_image = Image.fromarray(processed)

	# Step 4: Local VLM inference
	raw_text = ocr_engine.extract_text(pil_image)
	logger.info("VLM raw output (%d chars)", len(raw_text))

	if not raw_text.strip():
	logger.info("Retrying with binarized image...")
	enhanced_rgb = enhance(rgb)
	pil_enhanced = Image.fromarray(enhanced_rgb)
	raw_text = ocr_engine.extract_text(pil_enhanced)

	# Step 5: Brain — try JSON parse first, then regex
	result = try_parse_json_response(raw_text)
	if not result:
	result = process_raw_text(raw_text)

	# Step 6: Enrich with metadata
	result["processing_time_ms"] = int((time.time() - t0) * 1000)
	result["raw_text"] = raw_text[:500]
	result["image_quality"] = quality
	result["engine"] = {**ocr_engine.health_check(), "active_engine": "qaari_local"}

	return result


	# ── Background OCR Worker (Async Job Queue) ───────────────────────────────────

	def _run_ocr_job(job_id: str, raw_bytes: bytes, img_hash: str):
	"""Blocking OCR function executed in a thread-pool worker."""
	try:
	job_store[job_id]["status"] = "processing"
	rgb = load_image(raw_bytes)
	# Pass raw_bytes so process_image can try Gemini/OpenRouter first
	result = process_image(rgb, raw_bytes=raw_bytes)
	result["job_id"] = job_id
	result["success"] = bool(result.get("items"))
	result["cached"] = False
	_cache_put(img_hash, result)
	job_store[job_id].update({"status": "done", "result": result})
	elapsed = time.time() - job_store[job_id]["ts"]
	logger.info("[%s] Job completed in %.1fs", job_id, elapsed)
	except Exception as e:
	logger.exception("[%s] Job failed", job_id)
	job_store[job_id].update({"status": "error", "error": str(e)})
	finally:
	gc.collect()


	# ── Endpoints ─────────────────────────────────────────────────────────────────

	@app.post("/process-parchi")
	async def process_parchi(image: UploadFile = File(...)):
	"""
	Submit a parchi image for OCR processing.

	Returns immediately with a job_id (typically <1s).
	Poll GET /result/{job_id} every 10s until status == 'done'.

	This async pattern is required because CPU inference takes 2-4 minutes,
	which exceeds the HF platform HTTP timeout (~60s).
	"""
	job_id = str(uuid.uuid4())[:12]
	logger.info("[%s] Received: %s (%s)", job_id, image.filename, image.content_type)

	try:
	raw_bytes = await image.read()
	except Exception as e:
	raise HTTPException(400, f"Failed to read file: {e}")

	# Cache hit -- return result immediately without spawning a job
	img_hash = _image_hash(raw_bytes)
	cached = _cache_get(img_hash)
	if cached:
	logger.info("[%s] Cache hit -- returning immediately", job_id)
	cached["job_id"] = job_id
	cached["cached"] = True
	cached["status"] = "done"
	return JSONResponse(cached)

	# Validate image before queuing
	try:
	load_image(raw_bytes)
	except ValueError as e:
	raise HTTPException(400, str(e))
	except Exception as e:
	raise HTTPException(400, f"Invalid image: {e}")

	# Register job and prune stale ones
	job_store[job_id] = {"status": "queued", "ts": time.time(), "result": None, "error": None}
	now = time.time()
	stale = [k for k, v in job_store.items() if now - v["ts"] > JOB_TTL]
	for k in stale:
	del job_store[k]

	# Submit to thread pool (non-blocking -- returns immediately)
	loop = asyncio.get_event_loop()
	loop.run_in_executor(None, _run_ocr_job, job_id, raw_bytes, img_hash)

	logger.info("[%s] Job queued -- returning job_id immediately", job_id)
	return JSONResponse({
	"job_id": job_id,
	"status": "queued",
	"poll_url": f"/result/{job_id}",
	"message": "Image accepted. Poll /result/{job_id} every 10s until status=done.",
	})


	@app.get("/result/{job_id}")
	async def get_result(job_id: str):
	"""
	Poll for OCR job result.

	Returns:
	status=queued\|processing : not ready yet, poll again in 10s
	status=done : result field contains the structured parchi JSON
	status=error : error field contains the failure message
	"""
	job = job_store.get(job_id)
	if not job:
	raise HTTPException(404, f"Job '{job_id}' not found. It may have expired (TTL=1h).")

	response: Dict[str, Any] = {"job_id": job_id, "status": job["status"]}
	if job["status"] == "done":
	response.update(job["result"] or {})
	elif job["status"] == "error":
	response["error"] = job["error"]
	else:
	response["elapsed_seconds"] = int(time.time() - job["ts"])
	response["message"] = "Job is processing. Poll again in 10 seconds."

	return JSONResponse(response)


	@app.get("/health")
	async def health():
	"""Health check with engine and queue status."""
	active = sum(1 for j in job_store.values() if j["status"] in ("queued", "processing"))
	return {
	"status": "healthy",
	"version": "7.1.0",
	"architecture": "Local Hybrid (Qaari + GOT-OCR) -- Async Job Queue",
	"engine": ocr_engine.health_check(),
	"cache_entries": len(result_cache),
	"active_jobs": active,
	"total_jobs": len(job_store),
	}


	@app.get("/")
	async def root():
	"""Root endpoint."""
	return {
	"service": "Smart Parchi OCR v7.1",
	"docs": "/docs",
	"health": "/health",
	"submit": "POST /process-parchi -> {job_id, status: queued}",
	"poll": "GET /result/{job_id} -> {status, result (when done)}",
	}