Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import easyocr | |
| import cv2 | |
| import numpy as np | |
| import re | |
| import os | |
| app = FastAPI() | |
| # ========================= | |
| # LOAD OCR MODEL | |
| # ========================= | |
| reader = easyocr.Reader(['en']) | |
| # ========================= | |
| # REQUEST MODEL | |
| # ========================= | |
| class ImageRequest(BaseModel): | |
| image_path: str | |
| # ========================= | |
| # IMAGE QUALITY CHECKS | |
| # ========================= | |
| def is_blurry(image): | |
| gray = cv2.cvtColor( | |
| image, | |
| cv2.COLOR_BGR2GRAY | |
| ) | |
| variance = cv2.Laplacian( | |
| gray, | |
| cv2.CV_64F | |
| ).var() | |
| return variance < 100 | |
| def is_dark(image): | |
| brightness = np.mean(image) | |
| return brightness < 50 | |
| # ========================= | |
| # OCR TEXT EXTRACTION | |
| # ========================= | |
| def extract_text(image_path): | |
| results = reader.readtext(image_path) | |
| text = " ".join( | |
| [r[1] for r in results] | |
| ).lower() | |
| return text | |
| # ========================= | |
| # DOCUMENT DETECTION | |
| # ========================= | |
| def detect_document(text): | |
| # CLEAN TEXT | |
| text = text.lower().strip() | |
| # REMOVE EXTRA SYMBOLS | |
| cleaned_text = re.sub( | |
| r'[^a-zA-Z0-9\s-]', | |
| ' ', | |
| text | |
| ) | |
| # SPLIT WORDS | |
| words = cleaned_text.split() | |
| # ========================= | |
| # REJECT RANDOM OCR GARBAGE | |
| # ========================= | |
| garbage_patterns = [ | |
| r'^[a-z0-9]{4,8}$' | |
| ] | |
| for pattern in garbage_patterns: | |
| for word in words: | |
| if re.match(pattern, word): | |
| if len(words) <= 2: | |
| return { | |
| "document_type": "unknown", | |
| "confidence": 5, | |
| "matched_keywords": [word], | |
| "reason": ( | |
| "OCR detected unreadable or " | |
| "meaningless text." | |
| ) | |
| } | |
| # ========================= | |
| # NIN | |
| # ========================= | |
| nin_keywords = [ | |
| "national identification number", | |
| "national identity", | |
| "nin", | |
| "nimc" | |
| ] | |
| matched_keywords = [] | |
| for keyword in nin_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "nin", | |
| "confidence": 95, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # PASSPORT | |
| # ========================= | |
| passport_keywords = [ | |
| "passport", | |
| "federal republic of nigeria", | |
| "nigeria passport" | |
| ] | |
| matched_keywords = [] | |
| for keyword in passport_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "passport", | |
| "confidence": 94, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # DRIVER LICENSE | |
| # ========================= | |
| license_keywords = [ | |
| "driver", | |
| "license", | |
| "drivers licence", | |
| "driver licence", | |
| "frsc" | |
| ] | |
| matched_keywords = [] | |
| for keyword in license_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) >= 2: | |
| return { | |
| "document_type": "drivers_license", | |
| "confidence": 92, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # VOTER CARD | |
| # ========================= | |
| voter_keywords = [ | |
| "voter", | |
| "inec", | |
| "permanent voter", | |
| "polling unit" | |
| ] | |
| matched_keywords = [] | |
| for keyword in voter_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "voters_card", | |
| "confidence": 90, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # ELECTRICITY / UTILITY BILL | |
| # ========================= | |
| electricity_keywords = [ | |
| # General | |
| "electricity", | |
| "electric bill", | |
| "power bill", | |
| "meter number", | |
| # Nigerian DISCOs | |
| "ibedc", | |
| "ibadan electricity", | |
| "ikedc", | |
| "ikeja electric", | |
| "ekedc", | |
| "eko electric", | |
| "aedc", | |
| "abuja electricity", | |
| "eedc", | |
| "enugu electricity", | |
| "bedc", | |
| "benin electricity", | |
| "jed", | |
| "jos electricity", | |
| "kedco", | |
| "kano electricity", | |
| "kaedco", | |
| "kaduna electric", | |
| "phed", | |
| "port harcourt electricity", | |
| "yedc", | |
| "yola electricity", | |
| # Common terms | |
| "prepaid", | |
| "postpaid", | |
| "disco", | |
| "energy charge", | |
| "tariff" | |
| ] | |
| matched_keywords = [] | |
| for keyword in electricity_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "utility_bill", | |
| "confidence": 90, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # BANK STATEMENT | |
| # ========================= | |
| bank_keywords = [ | |
| "account statement", | |
| "statement of account", | |
| "transaction", | |
| "balance", | |
| "account number", | |
| "credit", | |
| "debit", | |
| "withdrawal", | |
| "deposit", | |
| # Nigerian Banks | |
| "access bank", | |
| "gtbank", | |
| "uba", | |
| "zenith bank", | |
| "first bank", | |
| "opay", | |
| "moniepoint", | |
| "kuda", | |
| "fcmb", | |
| "sterling bank" | |
| ] | |
| matched_keywords = [] | |
| for keyword in bank_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "bank_statement", | |
| "confidence": 91, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # TENANCY AGREEMENT | |
| # ========================= | |
| tenancy_keywords = [ | |
| "tenancy agreement", | |
| "landlord", | |
| "tenant", | |
| "rent", | |
| "property", | |
| "lease agreement", | |
| "rental agreement" | |
| ] | |
| matched_keywords = [] | |
| for keyword in tenancy_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "tenancy_agreement", | |
| "confidence": 89, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # VEHICLE KEYWORDS | |
| # ========================= | |
| vehicle_keywords = [ | |
| "toyota", | |
| "honda", | |
| "lexus", | |
| "benz", | |
| "mercedes", | |
| "ford", | |
| "jeep", | |
| "hyundai", | |
| "kia", | |
| "nissan", | |
| "camry", | |
| "corolla", | |
| "rav4", | |
| "pilot", | |
| "highlander", | |
| "vehicle", | |
| "plate number" | |
| ] | |
| matched_keywords = [] | |
| for keyword in vehicle_keywords: | |
| if keyword in cleaned_text: | |
| matched_keywords.append(keyword) | |
| # ========================= | |
| # NIGERIAN STATES | |
| # ========================= | |
| nigeria_states = [ | |
| "lagos", | |
| "abuja", | |
| "kano", | |
| "kaduna", | |
| "oyo", | |
| "ogun", | |
| "ondo", | |
| "osun", | |
| "kwara", | |
| "imo", | |
| "anambra", | |
| "enugu", | |
| "rivers", | |
| "delta", | |
| "edo", | |
| "cross river", | |
| "akwa ibom", | |
| "bayelsa", | |
| "plateau", | |
| "benue", | |
| "kogi", | |
| "ekiti", | |
| "niger", | |
| "zamfara", | |
| "sokoto", | |
| "katsina", | |
| "borno", | |
| "yobe", | |
| "adamawa", | |
| "taraba", | |
| "gombe", | |
| "bauchi", | |
| "jigawa", | |
| "nasarawa", | |
| "kebbi", | |
| "ebonyi" | |
| ] | |
| state_matches = [] | |
| for state in nigeria_states: | |
| if state in cleaned_text: | |
| state_matches.append(state) | |
| # ========================= | |
| # NIGERIAN PLATE PATTERNS | |
| # ========================= | |
| plate_patterns = [ | |
| r"[A-Z]{3}-?\d{3}[A-Z]{2}", | |
| r"[A-Z]{2}\d{3}[A-Z]{3}", | |
| r"[A-Z]{3}\s\d{3}\s[A-Z]{2}" | |
| ] | |
| detected_plate = None | |
| for pattern in plate_patterns: | |
| plate_match = re.search( | |
| pattern, | |
| cleaned_text.upper() | |
| ) | |
| if plate_match: | |
| detected_plate = plate_match.group() | |
| break | |
| # ========================= | |
| # VEHICLE DETECTION | |
| # ========================= | |
| if detected_plate: | |
| return { | |
| "document_type": "vehicle_plate", | |
| "confidence": 97, | |
| "matched_keywords": [ | |
| detected_plate | |
| ] + state_matches | |
| } | |
| # VEHICLE WITHOUT CLEAR PLATE | |
| if len(matched_keywords) > 0: | |
| return { | |
| "document_type": "vehicle_image", | |
| "confidence": 75, | |
| "matched_keywords": matched_keywords | |
| } | |
| # ========================= | |
| # UNKNOWN DOCUMENT | |
| # ========================= | |
| return None | |
| # ========================= | |
| # HOME ROUTE | |
| # ========================= | |
| def home(): | |
| return { | |
| "success": True, | |
| "message": "Document Validation API Running", | |
| "supported_documents": [ | |
| "National ID (NIN)", | |
| "International Passport", | |
| "Driver License", | |
| "Voter Card", | |
| "Vehicle with Plate Number", | |
| "Utility Bill", | |
| "Bank Statement", | |
| "Tenancy Agreement" | |
| ] | |
| } | |
| # ========================= | |
| # VALIDATION ENDPOINT | |
| # ========================= | |
| async def validate_document( | |
| request: ImageRequest | |
| ): | |
| try: | |
| image_path = request.image_path | |
| # ========================= | |
| # CHECK FILE EXISTS | |
| # ========================= | |
| if not os.path.exists(image_path): | |
| return { | |
| "success": False, | |
| "message": "Image not found", | |
| "reason": ( | |
| "The provided image path " | |
| "does not exist." | |
| ) | |
| } | |
| # ========================= | |
| # READ IMAGE | |
| # ========================= | |
| image = cv2.imread(image_path) | |
| if image is None: | |
| return { | |
| "success": False, | |
| "message": "Invalid image", | |
| "reason": ( | |
| "The file could not be " | |
| "read as an image." | |
| ), | |
| "suggestion": ( | |
| "Provide a valid JPG or PNG image." | |
| ) | |
| } | |
| # ========================= | |
| # BLUR CHECK | |
| # ========================= | |
| if is_blurry(image): | |
| return { | |
| "success": False, | |
| "message": "Image rejected", | |
| "reason": "The image is blurry.", | |
| "suggestion": ( | |
| "Retake the photo with " | |
| "better focus." | |
| ) | |
| } | |
| # ========================= | |
| # DARK IMAGE CHECK | |
| # ========================= | |
| if is_dark(image): | |
| return { | |
| "success": False, | |
| "message": "Image rejected", | |
| "reason": "The image is too dark.", | |
| "suggestion": ( | |
| "Use better lighting." | |
| ) | |
| } | |
| # ========================= | |
| # OCR TEXT EXTRACTION | |
| # ========================= | |
| text = extract_text(image_path) | |
| # ========================= | |
| # NO TEXT FOUND | |
| # ========================= | |
| if len(text.strip()) == 0: | |
| return { | |
| "success": False, | |
| "message": "Document rejected", | |
| "reason": ( | |
| "No readable text was detected." | |
| ), | |
| "suggestion": ( | |
| "Ensure the document is " | |
| "clear and fully visible." | |
| ) | |
| } | |
| # ========================= | |
| # DOCUMENT DETECTION | |
| # ========================= | |
| document_result = detect_document(text) | |
| # ========================= | |
| # UNSUPPORTED DOCUMENT | |
| # ========================= | |
| if document_result is None: | |
| return { | |
| "success": False, | |
| "message": "Document rejected", | |
| "reason": ( | |
| "The uploaded image does not " | |
| "match any supported document type." | |
| ), | |
| "supported_documents": [ | |
| "National ID (NIN)", | |
| "International Passport", | |
| "Driver License", | |
| "Voter Card", | |
| "Vehicle with Plate Number", | |
| "Utility Bill", | |
| "Bank Statement", | |
| "Tenancy Agreement" | |
| ], | |
| "possible_issues": [ | |
| "Image is cropped", | |
| "Text is unreadable", | |
| "Unsupported document uploaded", | |
| "Poor lighting", | |
| "Low image quality", | |
| "Document too far from camera", | |
| "Document partially hidden" | |
| ], | |
| "ocr_preview": text[:300] | |
| } | |
| # ========================= | |
| # SUCCESS RESPONSE | |
| # ========================= | |
| return { | |
| "success": True, | |
| "message": "Document verified successfully", | |
| "document_type": ( | |
| document_result["document_type"] | |
| ), | |
| "confidence": ( | |
| document_result["confidence"] | |
| ), | |
| "matched_keywords": ( | |
| document_result["matched_keywords"] | |
| ), | |
| "ocr_preview": text[:300] | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "message": "System error", | |
| "reason": str(e) | |
| } | |
| # ========================= | |
| # RUN SERVER | |
| # ========================= | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run( | |
| app, | |
| host="0.0.0.0", | |
| port=7860 | |
| ) |