Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import easyocr | |
| import cv2 | |
| import numpy as np | |
| import re | |
| import os | |
| import requests | |
| app = FastAPI() | |
| # ========================= | |
| # LOAD OCR MODEL | |
| # ========================= | |
| reader = easyocr.Reader(['en']) | |
| # ========================= | |
| # REQUEST MODEL | |
| # ========================= | |
| class ImageRequest(BaseModel): | |
| image_url: str | |
| document_type: str | |
| # ========================= | |
| # DOWNLOAD IMAGE | |
| # ========================= | |
| def download_image(url): | |
| try: | |
| response = requests.get( | |
| url, | |
| timeout=30 | |
| ) | |
| if response.status_code != 200: | |
| return None | |
| image_path = "temp.jpg" | |
| with open(image_path, "wb") as f: | |
| f.write(response.content) | |
| return image_path | |
| except: | |
| return None | |
| # ========================= | |
| # IMAGE QUALITY CHECKS | |
| # ========================= | |
| def is_blurry(image): | |
| gray = cv2.cvtColor( | |
| image, | |
| cv2.COLOR_BGR2GRAY | |
| ) | |
| variance = cv2.Laplacian( | |
| gray, | |
| cv2.CV_64F | |
| ).var() | |
| return variance < 100 | |
| def is_dark(image): | |
| brightness = np.mean(image) | |
| return brightness < 50 | |
| # ========================= | |
| # OCR TEXT EXTRACTION | |
| # ========================= | |
| def extract_text(image_path): | |
| results = reader.readtext(image_path) | |
| text = " ".join( | |
| [r[1] for r in results] | |
| ).lower() | |
| return text | |
| # ========================= | |
| # DOCUMENT VALIDATION | |
| # ========================= | |
| def validate_document_type( | |
| text, | |
| document_type | |
| ): | |
| text = text.lower().strip() | |
| cleaned_text = re.sub( | |
| r'[^a-zA-Z0-9\s-]', | |
| ' ', | |
| text | |
| ) | |
| matched_keywords = [] | |
| confidence = 0 | |
| # ========================= | |
| # NATIONAL ID (NIN) | |
| # ========================= | |
| if document_type == "National ID (NIN)": | |
| keywords = [ | |
| "national identification number", | |
| "national identity", | |
| "nimc", | |
| "nin" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 25 | |
| # ========================= | |
| # INTERNATIONAL PASSPORT | |
| # ========================= | |
| elif document_type == "International Passport": | |
| keywords = [ | |
| "passport", | |
| "federal republic of nigeria", | |
| "nigeria passport", | |
| "international passport" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 25 | |
| # ========================= | |
| # DRIVER LICENSE | |
| # ========================= | |
| elif document_type == "Driver License": | |
| keywords = [ | |
| "driver", | |
| "license", | |
| "drivers licence", | |
| "driver licence", | |
| "frsc" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 20 | |
| # ========================= | |
| # VOTER CARD | |
| # ========================= | |
| elif document_type == "Voter Card": | |
| keywords = [ | |
| "voter", | |
| "inec", | |
| "permanent voter", | |
| "polling unit" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 25 | |
| # ========================= | |
| # UTILITY BILL | |
| # ========================= | |
| elif document_type == "Utility Bill": | |
| keywords = [ | |
| # General | |
| "electricity", | |
| "electric bill", | |
| "power bill", | |
| "meter number", | |
| "meter no", | |
| "token", | |
| "kwh", | |
| "prepaid", | |
| "postpaid", | |
| "energy charge", | |
| "tariff", | |
| # Nigerian DISCOs | |
| "ibedc", | |
| "ibadan electricity", | |
| "ikedc", | |
| "ikeja electric", | |
| "ekedc", | |
| "eko electric", | |
| "aedc", | |
| "abuja electricity", | |
| "eedc", | |
| "enugu electricity", | |
| "bedc", | |
| "benin electricity", | |
| "jed", | |
| "jos electricity", | |
| "kedco", | |
| "kano electricity", | |
| "kaedco", | |
| "kaduna electric", | |
| "phed", | |
| "port harcourt electricity", | |
| "yedc", | |
| "yola electricity" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 15 | |
| # ========================= | |
| # BANK STATEMENT | |
| # ========================= | |
| elif document_type == "Bank Statement": | |
| keywords = [ | |
| "account statement", | |
| "statement of account", | |
| "transaction", | |
| "balance", | |
| "account number", | |
| "credit", | |
| "debit", | |
| "withdrawal", | |
| "deposit", | |
| # Nigerian Banks | |
| "access bank", | |
| "gtbank", | |
| "uba", | |
| "zenith bank", | |
| "first bank", | |
| "opay", | |
| "moniepoint", | |
| "kuda", | |
| "fcmb", | |
| "sterling bank", | |
| "wema bank", | |
| "providus", | |
| "fidelity bank", | |
| "union bank" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 15 | |
| # ========================= | |
| # TENANCY AGREEMENT | |
| # ========================= | |
| elif document_type == "Tenancy Agreement": | |
| keywords = [ | |
| "tenancy agreement", | |
| "landlord", | |
| "tenant", | |
| "rent", | |
| "property", | |
| "lease agreement", | |
| "rental agreement" | |
| ] | |
| for keyword in keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 20 | |
| # ========================= | |
| # VEHICLE WITH PLATE NUMBER | |
| # ========================= | |
| elif document_type == "Vehicle with Plate Number": | |
| vehicle_keywords = [ | |
| "toyota", | |
| "honda", | |
| "lexus", | |
| "benz", | |
| "mercedes", | |
| "ford", | |
| "jeep", | |
| "hyundai", | |
| "kia", | |
| "nissan", | |
| "camry", | |
| "corolla", | |
| "rav4", | |
| "pilot", | |
| "highlander", | |
| "vehicle", | |
| "plate number" | |
| ] | |
| for keyword in vehicle_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| confidence += 10 | |
| # Nigerian states | |
| nigeria_states = [ | |
| "lagos", | |
| "abuja", | |
| "kano", | |
| "kaduna", | |
| "oyo", | |
| "ogun", | |
| "ondo", | |
| "osun", | |
| "kwara", | |
| "imo", | |
| "anambra", | |
| "enugu", | |
| "rivers", | |
| "delta", | |
| "edo", | |
| "cross river", | |
| "akwa ibom", | |
| "bayelsa", | |
| "plateau", | |
| "benue", | |
| "kogi", | |
| "ekiti", | |
| "niger", | |
| "zamfara", | |
| "sokoto", | |
| "katsina", | |
| "borno", | |
| "yobe", | |
| "adamawa", | |
| "taraba", | |
| "gombe", | |
| "bauchi", | |
| "jigawa", | |
| "nasarawa", | |
| "kebbi", | |
| "ebonyi" | |
| ] | |
| for state in nigeria_states: | |
| if state in cleaned_text: | |
| matched_keywords.append(state) | |
| confidence += 5 | |
| # Plate patterns | |
| plate_patterns = [ | |
| r"[A-Z]{3}-?\d{3}[A-Z]{2}", | |
| r"[A-Z]{2}\d{3}[A-Z]{3}", | |
| r"[A-Z]{3}\s\d{3}\s[A-Z]{2}" | |
| ] | |
| for pattern in plate_patterns: | |
| plate_match = re.search( | |
| pattern, | |
| cleaned_text.upper() | |
| ) | |
| if plate_match: | |
| matched_keywords.append( | |
| plate_match.group() | |
| ) | |
| confidence += 50 | |
| # ========================= | |
| # LOW CONFIDENCE | |
| # ========================= | |
| if confidence <= 0: | |
| return None | |
| confidence = min(confidence, 99) | |
| return { | |
| "document_type": document_type, | |
| "confidence": confidence, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # HOME ROUTE | |
| # ========================= | |
| def home(): | |
| return { | |
| "success": True, | |
| "message": "Document Validation API Running", | |
| "supported_documents": [ | |
| "National ID (NIN)", | |
| "International Passport", | |
| "Driver License", | |
| "Voter Card", | |
| "Vehicle with Plate Number", | |
| "Utility Bill", | |
| "Bank Statement", | |
| "Tenancy Agreement" | |
| ] | |
| } | |
| # ========================= | |
| # VALIDATION ENDPOINT | |
| # ========================= | |
| async def validate_document( | |
| request: ImageRequest | |
| ): | |
| try: | |
| # ========================= | |
| # VALID DOCUMENT TYPES | |
| # ========================= | |
| valid_document_types = [ | |
| "National ID (NIN)", | |
| "International Passport", | |
| "Driver License", | |
| "Voter Card", | |
| "Vehicle with Plate Number", | |
| "Utility Bill", | |
| "Bank Statement", | |
| "Tenancy Agreement" | |
| ] | |
| if request.document_type not in valid_document_types: | |
| return { | |
| "success": False, | |
| "message": "Invalid document type", | |
| "supported_document_types": ( | |
| valid_document_types | |
| ) | |
| } | |
| # ========================= | |
| # DOWNLOAD IMAGE | |
| # ========================= | |
| image_path = download_image( | |
| request.image_url | |
| ) | |
| if image_path is None: | |
| return { | |
| "success": False, | |
| "message": "Image download failed", | |
| "reason": ( | |
| "Could not download image " | |
| "from URL." | |
| ) | |
| } | |
| # ========================= | |
| # READ IMAGE | |
| # ========================= | |
| image = cv2.imread(image_path) | |
| if image is None: | |
| return { | |
| "success": False, | |
| "message": "Invalid image", | |
| "reason": ( | |
| "The downloaded file " | |
| "could not be read " | |
| "as an image." | |
| ), | |
| "suggestion": ( | |
| "Ensure the URL points " | |
| "directly to an image." | |
| ) | |
| } | |
| # ========================= | |
| # BLUR CHECK | |
| # ========================= | |
| if is_blurry(image): | |
| return { | |
| "success": False, | |
| "message": "Image rejected", | |
| "reason": ( | |
| "The uploaded image " | |
| "is blurry." | |
| ), | |
| "suggestion": ( | |
| "Retake the photo " | |
| "with better focus." | |
| ) | |
| } | |
| # ========================= | |
| # DARK CHECK | |
| # ========================= | |
| if is_dark(image): | |
| return { | |
| "success": False, | |
| "message": "Image rejected", | |
| "reason": ( | |
| "The uploaded image " | |
| "is too dark." | |
| ), | |
| "suggestion": ( | |
| "Take the photo in a " | |
| "brighter environment." | |
| ) | |
| } | |
| # ========================= | |
| # OCR EXTRACTION | |
| # ========================= | |
| text = extract_text(image_path) | |
| # ========================= | |
| # NO TEXT FOUND | |
| # ========================= | |
| if len(text.strip()) == 0: | |
| return { | |
| "success": False, | |
| "message": "Document rejected", | |
| "reason": ( | |
| "No readable text " | |
| "was detected " | |
| "in the image." | |
| ), | |
| "suggestion": ( | |
| "Ensure the document " | |
| "is clear and visible." | |
| ) | |
| } | |
| # ========================= | |
| # VALIDATE DOCUMENT | |
| # ========================= | |
| document_result = validate_document_type( | |
| text, | |
| request.document_type | |
| ) | |
| # ========================= | |
| # DOCUMENT FAILED | |
| # ========================= | |
| if document_result is None: | |
| return { | |
| "success": False, | |
| "message": "Document rejected", | |
| "reason": ( | |
| f"The uploaded image " | |
| f"does not match " | |
| f"the expected " | |
| f"document type: " | |
| f"{request.document_type}" | |
| ), | |
| "ocr_preview": text[:300], | |
| "possible_issues": [ | |
| "Wrong document uploaded", | |
| "Image is blurry", | |
| "Image is cropped", | |
| "Poor lighting", | |
| "Text not readable", | |
| "Document partially hidden" | |
| ] | |
| } | |
| # ========================= | |
| # SUCCESS RESPONSE | |
| # ========================= | |
| return { | |
| "success": True, | |
| "message": ( | |
| "Document verified successfully" | |
| ), | |
| "document_type": ( | |
| document_result["document_type"] | |
| ), | |
| "confidence": ( | |
| document_result["confidence"] | |
| ), | |
| "matched_keywords": ( | |
| document_result["matched_keywords"] | |
| ), | |
| "ocr_preview": text[:300] | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "message": "System error", | |
| "reason": str(e) | |
| } | |
| finally: | |
| # ========================= | |
| # CLEAN TEMP FILE | |
| # ========================= | |
| if os.path.exists("temp.jpg"): | |
| os.remove("temp.jpg") |