Spaces:

Um34ER
/

bazaar-bridge-ocr

Running

App Files Files Community

Um34ER commited on 29 days ago

Commit

63ae52b

verified ·

1 Parent(s): a072884

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -498

app.py CHANGED Viewed

@@ -1,549 +1,158 @@
 """
-Smart Parchi OCR - Working Version for HF CPU Basic
-Tested with: Atta-2 kg 200, Bugger 2, Cheeni 21 kg, Total = 950 u dhara
 """
-from __future__ import annotations
-import hashlib
-import io
-import logging
-import re
-import time
-import threading
-from typing import List, Tuple, Optional, Dict, Any
 import cv2
 import numpy as np
 from PIL import Image
-from fastapi import FastAPI, File, UploadFile, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-import easyocr
-# ============================================================================
-# CONFIGURATION
-# ============================================================================
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Image settings
-MAX_SIZE = 1200
-MIN_CONFIDENCE = 0.05  # Very low for handwriting
-TEXT_THRESHOLD = 0.10
-LOW_TEXT = 0.15
-# Price validation
-MAX_PRICE = 50000
-MIN_PRICE = 1
-# Cache
-CACHE_TTL = 86400  # 24 hours
-# ============================================================================
-# DATA MODELS
-# ============================================================================
 class ExtractedItem(BaseModel):
     name: str
-    quantity: float = 1.0
     price: float
-    confidence: float = 0.0
-    low_confidence: bool = False
-    unit: str = "pc"
 class ProcessResponse(BaseModel):
-    request_id: str
     success: bool
-    customer_name: Optional[str] = None
-    items: List[ExtractedItem] = []
-    total: float = 0.0
-    mismatch: bool = False
-    transaction_type: str = "unknown"
-    processing_time_ms: float = 0.0
-    item_count: int = 0
-    error: Optional[str] = None
-# ============================================================================
-# SIMPLE ITEM CORRECTIONS
-# ============================================================================
-ITEM_CORRECTIONS = {
-    'atta': ['atta', 'aata', 'arta', 'ata', 'flour', 'aataa'],
-    'cheeni': ['cheeni', 'chini', 'cheeny', 'cheni', 'sugar', 'chinni'],
-    'burger': ['burger', 'buger', 'bubiger', 'buggar', 'burjer'],
-    'ghee': ['ghee', 'ghi', 'desi ghee'],
-    'doodh': ['doodh', 'dudh', 'milk'],
-    'chawal': ['chawal', 'rice', 'chawal rice'],
-    'daal': ['daal', 'dal', 'lentils'],
-    'namak': ['namak', 'salt'],
-    'mirch': ['mirch', 'chili'],
-    'sabun': ['sabun', 'soap'],
-}
-TRANSACTION_WORDS = ['udhaar', 'udhar', 'u dhara', 'wasooli', 'وصولی', 'ادھار']
-# ============================================================================
-# UTILITIES
-# ============================================================================
-def normalize_text(text: str) -> str:
     """Clean OCR text"""
-    if not text:
-        return ""
-    # Urdu to English digits
-    urdu_digits = '۰۱۲۳۴۵۶۷۸۹'
-    eng_digits = '0123456789'
-    for u, e in zip(urdu_digits, eng_digits):
-        text = text.replace(u, e)
-    # Fix common confusions
-    text = text.replace('O', '0').replace('o', '0')
-    text = text.replace('l', '1').replace('I', '1')
-    text = text.replace('S', '5').replace('s', '5')
-    text = text.replace('Z', '2').replace('z', '2')
-    # Remove special chars
-    text = re.sub(r'[^\w\sء-ي0-9]', ' ', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text.lower()
-def extract_numbers(text: str) -> List[float]:
-    """Extract numbers from text"""
-    numbers = []
-    for match in re.finditer(r'\b(\d+(?:\.\d+)?)\b', text):
-        try:
-            num = float(match.group(1))
-            if 0 < num < 100000:
-                numbers.append(num)
-        except:
-            pass
-    return numbers
-def correct_item_name(name: str) -> str:
-    """Correct common OCR errors"""
-    name_lower = name.lower().strip()
-    for correct, variants in ITEM_CORRECTIONS.items():
-        if name_lower in variants:
-            return correct
-        for var in variants:
-            if var in name_lower or name_lower in var:
-                if len(var) > 2 and len(name_lower) > 2:
-                    return correct
-    return name_lower
-# ============================================================================
-# OCR ENGINE
-# ============================================================================
-_reader = None
-_lock = threading.Lock()
-def get_reader():
-    global _reader
-    if _reader is None:
-        with _lock:
-            if _reader is None:
-                logger.info("Loading EasyOCR (Urdu+English)...")
-                _reader = easyocr.Reader(['ur', 'en'], gpu=False)
-                logger.info("Ready!")
-    return _reader
-def run_ocr(image: np.ndarray) -> List[Tuple[float, str, float]]:
-    """Run OCR and return tokens"""
-    reader = get_reader()
-    try:
-        results = reader.readtext(
-            image,
-            detail=1,
-            paragraph=False,
-            text_threshold=TEXT_THRESHOLD,
-            low_text=LOW_TEXT,
-            width_ths=0.5,
-            ycenter_ths=0.5
-        )
-        tokens = []
-        for bbox, text, conf in results:
-            if conf >= MIN_CONFIDENCE:
-                cleaned = normalize_text(text)
-                if cleaned and len(cleaned) > 1:
-                    y_center = (bbox[0][1] + bbox[2][1]) / 2
-                    tokens.append((y_center, cleaned, conf))
-        tokens.sort(key=lambda x: x[0])
-        return tokens
-    except Exception as e:
-        logger.error(f"OCR error: {e}")
-        return []
-# ============================================================================
-# IMAGE PREPROCESSING
-# ============================================================================
-def preprocess_image(rgb: np.ndarray) -> List[np.ndarray]:
-    """Generate preprocessing variants"""
-    variants = []
-    gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
-    h, w = gray.shape
-    # Resize if needed
-    if max(h, w) > MAX_SIZE:
-        scale = MAX_SIZE / max(h, w)
-        gray = cv2.resize(gray, None, fx=scale, fy=scale)
-    # Variant 1: CLAHE
-    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
-    enhanced = clahe.apply(gray)
-    variants.append(cv2.cvtColor(enhanced, cv2.COLOR_GRAY2RGB))
-    # Variant 2: Adaptive threshold
-    thresh = cv2.adaptiveThreshold(
-        enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY, 15, 5
-    )
-    variants.append(cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB))
-    # Variant 3: Inverted (for light text)
-    inverted = cv2.bitwise_not(thresh)
-    variants.append(cv2.cvtColor(inverted, cv2.COLOR_GRAY2RGB))
-    return variants
-# ============================================================================
-# PARSING
-# ============================================================================
-def group_into_lines(tokens: List[Tuple[float, str, float]]) -> List[str]:
-    """Group tokens into lines"""
-    if not tokens:
-        return []
-    lines = []
-    current = [tokens[0]]
-    for t in tokens[1:]:
-        if abs(t[0] - current[-1][0]) <= 25:
-            current.append(t)
-        else:
-            lines.append(' '.join(x[1] for x in current))
-            current = [t]
-    if current:
-        lines.append(' '.join(x[1] for x in current))
-    return lines
-def parse_items_and_total(lines: List[str]) -> Tuple[List[Dict], float]:
-    """Parse items and extract total"""
     items = []
-    numbers_in_lines = []
     for line in lines:
-        # Skip header/footer
-        line_lower = line.lower()
-        skip = ['date', 'time', 'total', 'udhaar', 'wasooli', 'cash', 'name', 'customer']
-        if any(k in line_lower for k in skip):
-            # Check for total in these lines
-            nums = extract_numbers(line)
-            if nums and 'total' in line_lower or 'udhaar' in line_lower:
-                numbers_in_lines.extend(nums)
             continue
-        # Check for patterns
-        # Pattern 1: "item - qty - price" or "item - price"
-        if '-' in line:
-            parts = line.split('-')
-            if len(parts) >= 2:
-                item_name = parts[0].strip()
-                rest = '-'.join(parts[1:])
-                nums = extract_numbers(rest)
-                if len(nums) >= 2:
-                    # item - qty - price
-                    qty = nums[0]
-                    price = nums[1]
-                elif len(nums) == 1:
-                    # item - price
-                    qty = 1.0
-                    price = nums[0]
-                else:
-                    continue
-                if price and MIN_PRICE <= price <= MAX_PRICE:
-                    items.append({
-                        'name': correct_item_name(item_name),
-                        'quantity': qty,
-                        'price': price,
-                        'confidence': 0.75
-                    })
-                    continue
-        # Pattern 2: "item qty price"
-        nums = extract_numbers(line)
-        if len(nums) >= 2:
-            # Find text part (remove numbers)
-            text_part = line
-            for n in nums:
-                text_part = text_part.replace(str(int(n)), '', 1)
-            text_part = re.sub(r'\d+', '', text_part)
-            text_part = re.sub(r'[^\w\sء-ي]', ' ', text_part).strip()
-            if text_part and len(text_part) > 1:
-                qty = nums[0] if nums[0] > 1 or len(nums) == 2 else 1.0
-                price = nums[-1]
-                if price and MIN_PRICE <= price <= MAX_PRICE:
-                    items.append({
-                        'name': correct_item_name(text_part),
-                        'quantity': qty,
-                        'price': price,
-                        'confidence': 0.70
-                    })
-        elif len(nums) == 1:
-            # Single number - might be total or price without quantity
-            text_part = re.sub(r'\d+', '', line)
-            text_part = re.sub(r'[^\w\sء-ي]', ' ', text_part).strip()
-            if text_part and len(text_part) > 2 and len(text_part) < 30:
-                # This is likely an item with implicit quantity=1
-                price = nums[0]
-                if price and MIN_PRICE <= price <= MAX_PRICE:
-                    items.append({
-                        'name': correct_item_name(text_part),
-                        'quantity': 1.0,
-                        'price': price,
-                        'confidence': 0.65
-                    })
-            else:
-                # This might be a total
-                numbers_in_lines.extend(nums)
-    # Determine total
-    total = 0.0
-    if numbers_in_lines:
-        # Take the largest number as total
-        total = max(numbers_in_lines)
-    # Also check for explicit total line
-    for line in lines[-3:]:
-        if 'total' in line.lower() or 'udhaar' in line.lower() or 'ٹوٹل' in line:
-            nums = extract_numbers(line)
-            if nums:
-                total = max(nums)
-                break
-    # Calculate items sum
-    items_sum = sum(i['price'] * i['quantity'] for i in items) if items else 0
-    # If no total found, use items sum
-    if total == 0 and items_sum > 0:
-        total = items_sum
-    # Check if mismatch
-    mismatch = abs(total - items_sum) > 5 if total > 0 and items_sum > 0 else False
-    return items, total, mismatch
-def extract_customer_name(lines: List[str]) -> Optional[str]:
-    """Extract customer name from top lines"""
-    for i, line in enumerate(lines[:4]):
-        cleaned = re.sub(r'[^\w\sء-ي]', ' ', line).strip()
-        # Must have no digits
-        if any(c.isdigit() for c in cleaned):
-            continue
-        # Must have reasonable length
-        if len(cleaned) < 3 or len(cleaned) > 35:
-            continue
-        cleaned_lower = cleaned.lower()
-        skip = ['date', 'time', 'total', 'udhaar', 'wasooli', 'cash', 'name', 'customer', 'shop']
-        if any(k in cleaned_lower for k in skip):
-            continue
-        # Remove extra spaces
-        cleaned = re.sub(r'\s+', ' ', cleaned).strip()
-        if cleaned:
-            words = [w.capitalize() if w[0].isascii() else w for w in cleaned.split()]
-            return ' '.join(words)
-    return None
-def detect_type(lines: List[str]) -> str:
-    """Detect transaction type"""
-    for line in lines[-3:]:
-        line_lower = line.lower()
-        if any(w in line_lower for w in ['udhaar', 'udhar', 'u dhara', 'ادھار']):
-            return 'udhaar'
-        if any(w in line_lower for w in ['wasooli', 'وصولی']):
-            return 'wasooli'
-    return 'unknown'
-# ============================================================================
-# CACHE
-# ============================================================================
-result_cache = {}
-def get_cache_key(data: bytes) -> str:
-    return hashlib.sha256(data).hexdigest()
-# ============================================================================
-# FASTAPI APP
-# ============================================================================
-app = FastAPI(title="Parchi OCR", version="7.0.0")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.on_event("startup")
-async def startup():
-    """Warm up OCR"""
-    logger.info("Starting Parchi OCR v7.0...")
-    threading.Thread(target=get_reader).start()
-@app.get("/health")
-async def health():
-    return {"status": "ok", "version": "7.0.0"}
-@app.post("/process-parchi", response_model=ProcessResponse)
 async def process_parchi(image: UploadFile = File(...)):
-    """Process a parchi image"""
-    if not image.content_type or not image.content_type.startswith("image/"):
-        raise HTTPException(400, "Must be an image")
     contents = await image.read()
-    request_id = hashlib.md5(contents).hexdigest()[:16]
-    # Check cache
-    if request_id in result_cache:
-        logger.info(f"[{request_id}] Cache hit")
-        return result_cache[request_id]
-    start_time = time.time()
-    try:
-        # Load image
-        pil_img = Image.open(io.BytesIO(contents)).convert('RGB')
-        rgb = np.array(pil_img)
-        # Preprocess
-        variants = preprocess_image(rgb)
-        # OCR on all variants
-        all_tokens = []
-        for variant in variants:
-            tokens = run_ocr(variant)
-            all_tokens.extend(tokens)
-        # Deduplicate
-        seen = set()
-        unique = []
-        for y, text, conf in all_tokens:
-            key = (text, int(y // 25))
-            if key not in seen:
-                seen.add(key)
-                unique.append((y, text, conf))
-        unique.sort(key=lambda x: x[0])
-        # Group into lines
-        lines = group_into_lines(unique)
-        if not lines:
-            result = ProcessResponse(
-                request_id=request_id,
-                success=False,
-                error="No text detected",
-                processing_time_ms=(time.time() - start_time) * 1000
-            )
-            result_cache[request_id] = result
-            return result
-        # Parse
-        customer_name = extract_customer_name(lines)
-        items, total, mismatch = parse_items_and_total(lines)
-        tx_type = detect_type(lines)
-        # Format items
-        extracted_items = []
-        for item in items:
-            extracted_items.append(ExtractedItem(
-                name=item['name'],
-                quantity=item['quantity'],
-                price=round(item['price'], 2),
-                confidence=item['confidence'],
-                low_confidence=item['confidence'] < 0.5,
-                unit='kg' if 'kg' in item['name'] else 'pc'
-            ))
-        processing_time = (time.time() - start_time) * 1000
-        result = ProcessResponse(
-            request_id=request_id,
-            success=True,
-            customer_name=customer_name,
-            items=extracted_items,
-            total=round(total, 2),
-            mismatch=mismatch,
-            transaction_type=tx_type,
-            processing_time_ms=round(processing_time, 1),
-            item_count=len(extracted_items)
-        )
-        # Cache
-        result_cache[request_id] = result
-        # Clean old cache
-        if len(result_cache) > 100:
-            oldest = min(result_cache.keys())
-            del result_cache[oldest]
-        logger.info(f"[{request_id}] Items: {len(extracted_items)}, Total: {total}, Time: {processing_time:.0f}ms")
-        return result
-    except Exception as e:
-        logger.error(f"[{request_id}] Error: {e}")
-        return ProcessResponse(
-            request_id=request_id,
-            success=False,
-            error=str(e),
-            processing_time_ms=(time.time() - start_time) * 1000
-        )
 if __name__ == "__main__":

 """
+Parchi OCR - PaddleOCR Version (Works for Handwritten Urdu)
 """
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from paddleocr import PaddleOCR
 import cv2
 import numpy as np
 from PIL import Image
+import io
+import re
+import hashlib
+import time
+from typing import List, Dict, Any, Optional
 from pydantic import BaseModel
+import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+app = FastAPI(title="Parchi OCR Pro")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize PaddleOCR once
+ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)
 class ExtractedItem(BaseModel):
     name: str
+    quantity: float
     price: float
+    confidence: float
 class ProcessResponse(BaseModel):
     success: bool
+    items: List[ExtractedItem]
+    total: float
+    transaction_type: str
+    processing_time_ms: float
+def clean_text(text: str) -> str:
     """Clean OCR text"""
+    # Remove special characters
+    text = re.sub(r'[^\w\s]', ' ', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text.lower()
+def fix_urdu_digits(text: str) -> str:
+    """Convert Urdu digits to English"""
+    urdu_digits = {
+        '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
+        '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
+    }
+    for u, e in urdu_digits.items():
+        text = text.replace(u, e)
+    return text
+def parse_items_and_total(lines: List[str]) -> tuple:
+    """Parse items and total from OCR lines"""
     items = []
+    total = 0
     for line in lines:
+        # Fix digits
+        line = fix_urdu_digits(line)
+        # Check for total
+        if 'total' in line.lower() or 'udhaar' in line.lower():
+            numbers = re.findall(r'\d+', line)
+            if numbers:
+                total = int(numbers[-1])
             continue
+        # Look for pattern: "Item Qty Price" or "Item Price"
+        parts = line.split()
+        # Find numbers
+        numbers = [int(n) for n in re.findall(r'\d+', line)]
+        if len(numbers) >= 2:
+            # Has both quantity and price
+            price = numbers[-1]
+            qty = numbers[0] if len(numbers) >= 2 else 1
+            # Item name is text without numbers
+            name = re.sub(r'\d+', '', line)
+            name = clean_text(name)
+            if name and price:
+                items.append({
+                    'name': name[:20],
+                    'quantity': qty,
+                    'price': price,
+                    'confidence': 0.8
+                })
+        elif len(numbers) == 1 and not total:
+            # Single number - might be total
+            total = numbers[0]
+    # If no total found, calculate from items
+    if total == 0 and items:
+        total = sum(i['price'] * i['quantity'] for i in items)
+    return items, total
+@app.post("/process-parchi")
 async def process_parchi(image: UploadFile = File(...)):
+    """Process parchi image"""
+    start_time = time.time()
+    # Read image
     contents = await image.read()
+    img = Image.open(io.BytesIO(contents))
+    img = np.array(img)
+    # Run OCR
+    result = ocr.ocr(img, cls=True)
+    # Extract text lines
+    lines = []
+    if result and result[0]:
+        for line in result[0]:
+            text = line[1][0]
+            if text:
+                lines.append(text)
+    # Parse
+    items, total = parse_items_and_total(lines)
+    # Detect transaction type
+    full_text = ' '.join(lines).lower()
+    tx_type = 'udhaar' if 'udhaar' in full_text or 'udhar' in full_text else 'unknown'
+    processing_time = (time.time() - start_time) * 1000
+    return ProcessResponse(
+        success=True,
+        items=[ExtractedItem(**i) for i in items],
+        total=float(total),
+        transaction_type=tx_type,
+        processing_time_ms=processing_time
+    )
 if __name__ == "__main__":