Spaces:

Midnightar
/

document-validator

Sleeping

App Files Files Community

Midnightar commited on 14 days ago

Commit

d86e67c

verified ·

1 Parent(s): 968cf82

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -117

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
 import easyocr
 import cv2
 import numpy as np
@@ -14,36 +13,18 @@ app = FastAPI()
 reader = easyocr.Reader(['en'])
-# =========================
-# REQUEST MODEL
-# =========================
-class ImageRequest(BaseModel):
-    image_path: str
 # =========================
 # IMAGE QUALITY CHECKS
 # =========================
 def is_blurry(image):
-    gray = cv2.cvtColor(
-        image,
-        cv2.COLOR_BGR2GRAY
-    )
-    variance = cv2.Laplacian(
-        gray,
-        cv2.CV_64F
-    ).var()
     return variance < 100
 def is_dark(image):
     brightness = np.mean(image)
     return brightness < 50
@@ -52,13 +33,8 @@ def is_dark(image):
 # =========================
 def extract_text(image_path):
     results = reader.readtext(image_path)
-    text = " ".join(
-        [r[1] for r in results]
-    ).lower()
     return text
@@ -72,11 +48,7 @@ def detect_document(text):
     text = text.lower().strip()
     # REMOVE EXTRA SYMBOLS
-    cleaned_text = re.sub(
-        r'[^a-zA-Z0-9\s-]',
-        ' ',
-        text
-    )
     # SPLIT WORDS
     words = cleaned_text.split()
@@ -90,11 +62,8 @@ def detect_document(text):
     ]
     for pattern in garbage_patterns:
         for word in words:
             if re.match(pattern, word):
                 if len(words) <= 2:
                     return {
                         "document_type": "unknown",
@@ -120,12 +89,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in nin_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "nin",
             "confidence": 95,
@@ -145,12 +112,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in passport_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "passport",
             "confidence": 94,
@@ -172,12 +137,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in license_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) >= 2:
         return {
             "document_type": "drivers_license",
             "confidence": 92,
@@ -198,12 +161,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in voter_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "voters_card",
             "confidence": 90,
@@ -211,7 +172,7 @@ def detect_document(text):
         }
     # =========================
-    # ELECTRICITY / UTILITY BILL
     # =========================
     electricity_keywords = [
@@ -246,7 +207,7 @@ def detect_document(text):
         "yedc",
         "yola electricity",
-        # Common terms
         "prepaid",
         "postpaid",
         "disco",
@@ -257,12 +218,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in electricity_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "utility_bill",
             "confidence": 90,
@@ -274,7 +233,6 @@ def detect_document(text):
     # =========================
     bank_keywords = [
         "account statement",
         "statement of account",
         "transaction",
@@ -301,12 +259,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in bank_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "bank_statement",
             "confidence": 91,
@@ -318,7 +274,6 @@ def detect_document(text):
     # =========================
     tenancy_keywords = [
         "tenancy agreement",
         "landlord",
         "tenant",
@@ -331,12 +286,10 @@ def detect_document(text):
     matched_keywords = []
     for keyword in tenancy_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "tenancy_agreement",
             "confidence": 89,
@@ -348,7 +301,6 @@ def detect_document(text):
     # =========================
     vehicle_keywords = [
         "toyota",
         "honda",
         "lexus",
@@ -371,7 +323,6 @@ def detect_document(text):
     matched_keywords = []
     for keyword in vehicle_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
@@ -380,7 +331,6 @@ def detect_document(text):
     # =========================
     nigeria_states = [
         "lagos",
         "abuja",
         "kano",
@@ -422,7 +372,6 @@ def detect_document(text):
     state_matches = []
     for state in nigeria_states:
         if state in cleaned_text:
             state_matches.append(state)
@@ -431,7 +380,6 @@ def detect_document(text):
     # =========================
     plate_patterns = [
         r"[A-Z]{3}-?\d{3}[A-Z]{2}",
         r"[A-Z]{2}\d{3}[A-Z]{3}",
         r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
@@ -440,16 +388,10 @@ def detect_document(text):
     detected_plate = None
     for pattern in plate_patterns:
-        plate_match = re.search(
-            pattern,
-            cleaned_text.upper()
-        )
         if plate_match:
             detected_plate = plate_match.group()
             break
     # =========================
@@ -457,18 +399,14 @@ def detect_document(text):
     # =========================
     if detected_plate:
         return {
             "document_type": "vehicle_plate",
             "confidence": 97,
-            "matched_keywords": [
-                detected_plate
-            ] + state_matches
         }
     # VEHICLE WITHOUT CLEAR PLATE
     if len(matched_keywords) > 0:
         return {
             "document_type": "vehicle_image",
             "confidence": 75,
@@ -488,7 +426,6 @@ def detect_document(text):
 @app.get("/")
 def home():
     return {
         "success": True,
         "message": "Document Validation API Running",
@@ -510,28 +447,18 @@ def home():
 # =========================
 @app.post("/validate")
-async def validate_document(
-    request: ImageRequest
-):
     try:
-        image_path = request.image_path
         # =========================
-        # CHECK FILE EXISTS
         # =========================
-        if not os.path.exists(image_path):
-            return {
-                "success": False,
-                "message": "Image not found",
-                "reason": (
-                    "The provided image path "
-                    "does not exist."
-                )
-            }
         # =========================
         # READ IMAGE
@@ -540,16 +467,15 @@ async def validate_document(
         image = cv2.imread(image_path)
         if image is None:
             return {
                 "success": False,
                 "message": "Invalid image",
                 "reason": (
-                    "The file could not be "
-                    "read as an image."
                 ),
                 "suggestion": (
-                    "Provide a valid JPG or PNG image."
                 )
             }
@@ -558,14 +484,12 @@ async def validate_document(
         # =========================
         if is_blurry(image):
             return {
                 "success": False,
                 "message": "Image rejected",
-                "reason": "The image is blurry.",
                 "suggestion": (
-                    "Retake the photo with "
-                    "better focus."
                 )
             }
@@ -574,13 +498,12 @@ async def validate_document(
         # =========================
         if is_dark(image):
             return {
                 "success": False,
                 "message": "Image rejected",
-                "reason": "The image is too dark.",
                 "suggestion": (
-                    "Use better lighting."
                 )
             }
@@ -595,16 +518,16 @@ async def validate_document(
         # =========================
         if len(text.strip()) == 0:
             return {
                 "success": False,
                 "message": "Document rejected",
                 "reason": (
-                    "No readable text was detected."
                 ),
                 "suggestion": (
-                    "Ensure the document is "
-                    "clear and fully visible."
                 )
             }
@@ -619,13 +542,12 @@ async def validate_document(
         # =========================
         if document_result is None:
             return {
                 "success": False,
                 "message": "Document rejected",
                 "reason": (
-                    "The uploaded image does not "
-                    "match any supported document type."
                 ),
                 "supported_documents": [
                     "National ID (NIN)",
@@ -676,17 +598,11 @@ async def validate_document(
             "reason": str(e)
         }
-# =========================
-# RUN SERVER
-# =========================
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(
-        app,
-        host="0.0.0.0",
-        port=7860
-    )

+from fastapi import FastAPI, UploadFile, File
 import easyocr
 import cv2
 import numpy as np
 reader = easyocr.Reader(['en'])
 # =========================
 # IMAGE QUALITY CHECKS
 # =========================
 def is_blurry(image):
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    variance = cv2.Laplacian(gray, cv2.CV_64F).var()
     return variance < 100
 def is_dark(image):
     brightness = np.mean(image)
     return brightness < 50
 # =========================
 def extract_text(image_path):
     results = reader.readtext(image_path)
+    text = " ".join([r[1] for r in results]).lower()
     return text
     text = text.lower().strip()
     # REMOVE EXTRA SYMBOLS
+    cleaned_text = re.sub(r'[^a-zA-Z0-9\s-]', ' ', text)
     # SPLIT WORDS
     words = cleaned_text.split()
     ]
     for pattern in garbage_patterns:
         for word in words:
             if re.match(pattern, word):
                 if len(words) <= 2:
                     return {
                         "document_type": "unknown",
     matched_keywords = []
     for keyword in nin_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "nin",
             "confidence": 95,
     matched_keywords = []
     for keyword in passport_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "passport",
             "confidence": 94,
     matched_keywords = []
     for keyword in license_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) >= 2:
         return {
             "document_type": "drivers_license",
             "confidence": 92,
     matched_keywords = []
     for keyword in voter_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "voters_card",
             "confidence": 90,
         }
     # =========================
+    # ELECTRICITY COMPANIES
     # =========================
     electricity_keywords = [
         "yedc",
         "yola electricity",
+        # Common Nigerian utility terms
         "prepaid",
         "postpaid",
         "disco",
     matched_keywords = []
     for keyword in electricity_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "utility_bill",
             "confidence": 90,
     # =========================
     bank_keywords = [
         "account statement",
         "statement of account",
         "transaction",
     matched_keywords = []
     for keyword in bank_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "bank_statement",
             "confidence": 91,
     # =========================
     tenancy_keywords = [
         "tenancy agreement",
         "landlord",
         "tenant",
     matched_keywords = []
     for keyword in tenancy_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     if len(matched_keywords) > 0:
         return {
             "document_type": "tenancy_agreement",
             "confidence": 89,
     # =========================
     vehicle_keywords = [
         "toyota",
         "honda",
         "lexus",
     matched_keywords = []
     for keyword in vehicle_keywords:
         if keyword in cleaned_text:
             matched_keywords.append(keyword)
     # =========================
     nigeria_states = [
         "lagos",
         "abuja",
         "kano",
     state_matches = []
     for state in nigeria_states:
         if state in cleaned_text:
             state_matches.append(state)
     # =========================
     plate_patterns = [
         r"[A-Z]{3}-?\d{3}[A-Z]{2}",
         r"[A-Z]{2}\d{3}[A-Z]{3}",
         r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
     detected_plate = None
     for pattern in plate_patterns:
+        plate_match = re.search(pattern, cleaned_text.upper())
         if plate_match:
             detected_plate = plate_match.group()
             break
     # =========================
     # =========================
     if detected_plate:
         return {
             "document_type": "vehicle_plate",
             "confidence": 97,
+            "matched_keywords": [detected_plate] + state_matches
         }
     # VEHICLE WITHOUT CLEAR PLATE
     if len(matched_keywords) > 0:
         return {
             "document_type": "vehicle_image",
             "confidence": 75,
 @app.get("/")
 def home():
     return {
         "success": True,
         "message": "Document Validation API Running",
 # =========================
 @app.post("/validate")
+async def validate_document(file: UploadFile = File(...)):
     try:
         # =========================
+        # SAVE FILE
         # =========================
+        image_path = "temp.jpg"
+        with open(image_path, "wb") as f:
+            f.write(await file.read())
         # =========================
         # READ IMAGE
         image = cv2.imread(image_path)
         if image is None:
             return {
                 "success": False,
                 "message": "Invalid image",
                 "reason": (
+                    "The uploaded file could not "
+                    "be read as an image."
                 ),
                 "suggestion": (
+                    "Upload a valid JPG or PNG image."
                 )
             }
         # =========================
         if is_blurry(image):
             return {
                 "success": False,
                 "message": "Image rejected",
+                "reason": "The uploaded image is blurry.",
                 "suggestion": (
+                    "Retake the photo with better focus."
                 )
             }
         # =========================
         if is_dark(image):
             return {
                 "success": False,
                 "message": "Image rejected",
+                "reason": "The uploaded image is too dark.",
                 "suggestion": (
+                    "Take the photo in a brighter environment."
                 )
             }
         # =========================
         if len(text.strip()) == 0:
             return {
                 "success": False,
                 "message": "Document rejected",
                 "reason": (
+                    "No readable text was detected "
+                    "in the image."
                 ),
                 "suggestion": (
+                    "Ensure the document is clear "
+                    "and fully visible."
                 )
             }
         # =========================
         if document_result is None:
             return {
                 "success": False,
                 "message": "Document rejected",
                 "reason": (
+                    "The uploaded image does not match "
+                    "any supported document type."
                 ),
                 "supported_documents": [
                     "National ID (NIN)",
             "reason": str(e)
         }
+    finally:
+        # =========================
+        # CLEAN TEMP FILE
+        # =========================
+        if os.path.exists("temp.jpg"):
+            os.remove("temp.jpg")