Midnightar commited on
Commit
448497c
·
verified ·
1 Parent(s): 00f82b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -89
app.py CHANGED
@@ -1,5 +1,4 @@
1
  from fastapi import FastAPI, UploadFile, File
2
- from PIL import Image
3
  import easyocr
4
  import cv2
5
  import numpy as np
@@ -8,6 +7,10 @@ import os
8
 
9
  app = FastAPI()
10
 
 
 
 
 
11
  reader = easyocr.Reader(['en'])
12
 
13
  # =========================
@@ -15,179 +18,401 @@ reader = easyocr.Reader(['en'])
15
  # =========================
16
 
17
  def is_blurry(image):
 
18
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
19
- variance = cv2.Laplacian(gray, cv2.CV_64F).var()
20
 
21
- if variance < 100:
22
- return True
 
 
23
 
24
- return False
25
 
26
 
27
- def is_too_dark(image):
28
- brightness = np.mean(image)
29
 
30
- if brightness < 50:
31
- return True
32
 
33
- return False
34
 
35
 
36
  # =========================
37
- # OCR EXTRACTION
38
  # =========================
39
 
40
  def extract_text(image_path):
41
 
42
  results = reader.readtext(image_path)
43
 
44
- text = " ".join([r[1] for r in results]).lower()
 
 
45
 
46
  return text
47
 
48
 
49
  # =========================
50
- # DOCUMENT VALIDATION
51
  # =========================
52
 
53
  def detect_document(text):
54
 
 
55
  # NIN
56
- if (
57
- "national identification number" in text
58
- or "nin" in text
59
- ):
60
- return "nin"
61
-
62
- # Passport
63
- elif (
64
- "passport" in text
65
- or "federal republic of nigeria" in text
66
- ):
67
- return "passport"
68
-
69
- # Driver License
70
- elif (
71
- "driver" in text
72
- and "license" in text
73
- ):
74
- return "drivers_license"
75
-
76
- # Voter Card
77
- elif (
78
- "voter" in text
79
- or "inec" in text
80
- ):
81
- return "voters_card"
82
-
83
- # Utility Bill
84
- elif (
85
- "electricity" in text
86
- or "water bill" in text
87
- or "eko electric" in text
88
- or "ikeja electric" in text
89
- ):
90
- return "utility_bill"
91
-
92
- # Bank Statement
93
- elif (
94
- "account statement" in text
95
- or "transaction" in text
96
- or "balance" in text
97
- ):
98
- return "bank_statement"
99
-
100
- # Tenancy Agreement
101
- elif (
102
- "tenancy agreement" in text
103
- or "landlord" in text
104
- or "tenant" in text
105
- ):
106
- return "tenancy_agreement"
107
-
108
- # Vehicle Plate Number
109
- elif re.search(r"[A-Z]{3}-?\d{3}[A-Z]{2}", text.upper()):
110
- return "vehicle_plate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  return None
113
 
114
 
115
  # =========================
116
- # MAIN API ENDPOINT
117
  # =========================
118
 
119
- @app.post("/validate")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
 
121
  async def validate_document(
122
  file: UploadFile = File(...)
123
  ):
124
 
125
  try:
126
 
127
- # SAVE IMAGE
128
- temp_path = "temp.jpg"
 
 
 
129
 
130
- with open(temp_path, "wb") as f:
131
  f.write(await file.read())
132
 
 
133
  # READ IMAGE
134
- image = cv2.imread(temp_path)
 
 
135
 
136
  if image is None:
 
137
  return {
138
  "success": False,
139
- "message": "Invalid image"
 
 
140
  }
141
 
142
  # =========================
143
- # IMAGE QUALITY CHECK
144
  # =========================
145
 
146
  if is_blurry(image):
 
147
  return {
148
  "success": False,
149
- "message": "Image is blurry"
 
 
150
  }
151
 
152
- if is_too_dark(image):
 
 
 
 
 
153
  return {
154
  "success": False,
155
- "message": "Image is too dark"
 
 
156
  }
157
 
158
  # =========================
159
- # OCR
160
  # =========================
161
 
162
- text = extract_text(temp_path)
 
 
 
 
163
 
164
  if len(text.strip()) == 0:
 
165
  return {
166
  "success": False,
167
- "message": "No readable text found"
 
 
168
  }
169
 
170
  # =========================
171
  # DOCUMENT DETECTION
172
  # =========================
173
 
174
- document_type = detect_document(text)
 
 
 
 
 
 
175
 
176
- if document_type is None:
177
  return {
178
  "success": False,
179
- "message": "Rejected: Unsupported document"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
 
 
 
 
 
182
  return {
183
  "success": True,
184
- "document_type": document_type,
185
- "extracted_text": text[:300]
 
 
 
 
 
186
  }
187
 
188
  except Exception as e:
189
 
190
  return {
191
  "success": False,
192
- "message": str(e)
193
- }
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, UploadFile, File
 
2
  import easyocr
3
  import cv2
4
  import numpy as np
 
7
 
8
  app = FastAPI()
9
 
10
+ # =========================
11
+ # LOAD OCR MODEL
12
+ # =========================
13
+
14
  reader = easyocr.Reader(['en'])
15
 
16
  # =========================
 
18
  # =========================
19
 
20
  def is_blurry(image):
21
+
22
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
23
 
24
+ variance = cv2.Laplacian(
25
+ gray,
26
+ cv2.CV_64F
27
+ ).var()
28
 
29
+ return variance < 100
30
 
31
 
32
+ def is_dark(image):
 
33
 
34
+ brightness = np.mean(image)
 
35
 
36
+ return brightness < 50
37
 
38
 
39
  # =========================
40
+ # OCR TEXT EXTRACTION
41
  # =========================
42
 
43
  def extract_text(image_path):
44
 
45
  results = reader.readtext(image_path)
46
 
47
+ text = " ".join(
48
+ [r[1] for r in results]
49
+ ).lower()
50
 
51
  return text
52
 
53
 
54
  # =========================
55
+ # DOCUMENT DETECTION
56
  # =========================
57
 
58
  def detect_document(text):
59
 
60
+ # =========================
61
  # NIN
62
+ # =========================
63
+
64
+ nin_keywords = [
65
+ "national identification number",
66
+ "nin"
67
+ ]
68
+
69
+ matched_keywords = []
70
+
71
+ for keyword in nin_keywords:
72
+ if keyword in text:
73
+ matched_keywords.append(keyword)
74
+
75
+ if len(matched_keywords) > 0:
76
+ return {
77
+ "document_type": "nin",
78
+ "confidence": 95,
79
+ "matched_keywords": matched_keywords
80
+ }
81
+
82
+ # =========================
83
+ # PASSPORT
84
+ # =========================
85
+
86
+ passport_keywords = [
87
+ "passport",
88
+ "federal republic of nigeria"
89
+ ]
90
+
91
+ matched_keywords = []
92
+
93
+ for keyword in passport_keywords:
94
+ if keyword in text:
95
+ matched_keywords.append(keyword)
96
+
97
+ if len(matched_keywords) > 0:
98
+ return {
99
+ "document_type": "passport",
100
+ "confidence": 94,
101
+ "matched_keywords": matched_keywords
102
+ }
103
+
104
+ # =========================
105
+ # DRIVER LICENSE
106
+ # =========================
107
+
108
+ license_keywords = [
109
+ "driver",
110
+ "license"
111
+ ]
112
+
113
+ matched_keywords = []
114
+
115
+ for keyword in license_keywords:
116
+ if keyword in text:
117
+ matched_keywords.append(keyword)
118
+
119
+ if len(matched_keywords) >= 2:
120
+ return {
121
+ "document_type": "drivers_license",
122
+ "confidence": 92,
123
+ "matched_keywords": matched_keywords
124
+ }
125
+
126
+ # =========================
127
+ # VOTER CARD
128
+ # =========================
129
+
130
+ voter_keywords = [
131
+ "voter",
132
+ "inec"
133
+ ]
134
+
135
+ matched_keywords = []
136
+
137
+ for keyword in voter_keywords:
138
+ if keyword in text:
139
+ matched_keywords.append(keyword)
140
+
141
+ if len(matched_keywords) > 0:
142
+ return {
143
+ "document_type": "voters_card",
144
+ "confidence": 90,
145
+ "matched_keywords": matched_keywords
146
+ }
147
+
148
+ # =========================
149
+ # UTILITY BILL
150
+ # =========================
151
+
152
+ utility_keywords = [
153
+ "electricity",
154
+ "water bill",
155
+ "ikeja electric",
156
+ "eko electric",
157
+ "abuja electricity",
158
+ "aedc"
159
+ ]
160
+
161
+ matched_keywords = []
162
+
163
+ for keyword in utility_keywords:
164
+ if keyword in text:
165
+ matched_keywords.append(keyword)
166
+
167
+ if len(matched_keywords) > 0:
168
+ return {
169
+ "document_type": "utility_bill",
170
+ "confidence": 88,
171
+ "matched_keywords": matched_keywords
172
+ }
173
+
174
+ # =========================
175
+ # BANK STATEMENT
176
+ # =========================
177
+
178
+ bank_keywords = [
179
+ "account statement",
180
+ "transaction",
181
+ "balance",
182
+ "account number",
183
+ "credit",
184
+ "debit"
185
+ ]
186
+
187
+ matched_keywords = []
188
+
189
+ for keyword in bank_keywords:
190
+ if keyword in text:
191
+ matched_keywords.append(keyword)
192
+
193
+ if len(matched_keywords) > 0:
194
+ return {
195
+ "document_type": "bank_statement",
196
+ "confidence": 91,
197
+ "matched_keywords": matched_keywords
198
+ }
199
+
200
+ # =========================
201
+ # TENANCY AGREEMENT
202
+ # =========================
203
+
204
+ tenancy_keywords = [
205
+ "tenancy agreement",
206
+ "landlord",
207
+ "tenant",
208
+ "rent",
209
+ "property"
210
+ ]
211
+
212
+ matched_keywords = []
213
+
214
+ for keyword in tenancy_keywords:
215
+ if keyword in text:
216
+ matched_keywords.append(keyword)
217
+
218
+ if len(matched_keywords) > 0:
219
+ return {
220
+ "document_type": "tenancy_agreement",
221
+ "confidence": 89,
222
+ "matched_keywords": matched_keywords
223
+ }
224
+
225
+ # =========================
226
+ # VEHICLE PLATE NUMBER
227
+ # =========================
228
+
229
+ plate_match = re.search(
230
+ r"[A-Z]{3}-?\d{3}[A-Z]{2}",
231
+ text.upper()
232
+ )
233
+
234
+ if plate_match:
235
+ return {
236
+ "document_type": "vehicle_plate",
237
+ "confidence": 93,
238
+ "matched_keywords": [plate_match.group()]
239
+ }
240
 
241
  return None
242
 
243
 
244
  # =========================
245
+ # HOME ROUTE
246
  # =========================
247
 
248
+ @app.get("/")
249
+ def home():
250
+
251
+ return {
252
+ "success": True,
253
+ "message": "Document Validation API Running",
254
+ "supported_documents": [
255
+ "National ID (NIN)",
256
+ "International Passport",
257
+ "Driver License",
258
+ "Voter Card",
259
+ "Vehicle with Plate Number",
260
+ "Utility Bill",
261
+ "Bank Statement",
262
+ "Tenancy Agreement"
263
+ ]
264
+ }
265
+
266
+
267
+ # =========================
268
+ # VALIDATION ENDPOINT
269
+ # =========================
270
 
271
+ @app.post("/validate")
272
  async def validate_document(
273
  file: UploadFile = File(...)
274
  ):
275
 
276
  try:
277
 
278
+ # =========================
279
+ # SAVE FILE
280
+ # =========================
281
+
282
+ image_path = "temp.jpg"
283
 
284
+ with open(image_path, "wb") as f:
285
  f.write(await file.read())
286
 
287
+ # =========================
288
  # READ IMAGE
289
+ # =========================
290
+
291
+ image = cv2.imread(image_path)
292
 
293
  if image is None:
294
+
295
  return {
296
  "success": False,
297
+ "message": "Invalid image",
298
+ "reason": "The uploaded file could not be read as an image.",
299
+ "suggestion": "Upload a valid JPG or PNG image."
300
  }
301
 
302
  # =========================
303
+ # BLUR CHECK
304
  # =========================
305
 
306
  if is_blurry(image):
307
+
308
  return {
309
  "success": False,
310
+ "message": "Image rejected",
311
+ "reason": "The uploaded image is blurry.",
312
+ "suggestion": "Retake the photo with better focus."
313
  }
314
 
315
+ # =========================
316
+ # DARK IMAGE CHECK
317
+ # =========================
318
+
319
+ if is_dark(image):
320
+
321
  return {
322
  "success": False,
323
+ "message": "Image rejected",
324
+ "reason": "The uploaded image is too dark.",
325
+ "suggestion": "Take the photo in a brighter environment."
326
  }
327
 
328
  # =========================
329
+ # OCR TEXT EXTRACTION
330
  # =========================
331
 
332
+ text = extract_text(image_path)
333
+
334
+ # =========================
335
+ # NO TEXT FOUND
336
+ # =========================
337
 
338
  if len(text.strip()) == 0:
339
+
340
  return {
341
  "success": False,
342
+ "message": "Document rejected",
343
+ "reason": "No readable text was detected in the image.",
344
+ "suggestion": "Ensure the document is clear and fully visible."
345
  }
346
 
347
  # =========================
348
  # DOCUMENT DETECTION
349
  # =========================
350
 
351
+ document_result = detect_document(text)
352
+
353
+ # =========================
354
+ # UNSUPPORTED DOCUMENT
355
+ # =========================
356
+
357
+ if document_result is None:
358
 
 
359
  return {
360
  "success": False,
361
+ "message": "Document rejected",
362
+ "reason": (
363
+ "The uploaded image does not match any "
364
+ "supported document type."
365
+ ),
366
+ "supported_documents": [
367
+ "National ID (NIN)",
368
+ "International Passport",
369
+ "Driver License",
370
+ "Voter Card",
371
+ "Vehicle with Plate Number",
372
+ "Utility Bill",
373
+ "Bank Statement",
374
+ "Tenancy Agreement"
375
+ ],
376
+ "possible_issues": [
377
+ "Image is cropped",
378
+ "Text is unreadable",
379
+ "Unsupported document uploaded",
380
+ "Poor lighting",
381
+ "Low image quality",
382
+ "Document too far from camera",
383
+ "Document partially hidden"
384
+ ],
385
+ "ocr_preview": text[:300]
386
  }
387
 
388
+ # =========================
389
+ # SUCCESS RESPONSE
390
+ # =========================
391
+
392
  return {
393
  "success": True,
394
+ "message": "Document verified successfully",
395
+ "document_type": document_result["document_type"],
396
+ "confidence": document_result["confidence"],
397
+ "matched_keywords": (
398
+ document_result["matched_keywords"]
399
+ ),
400
+ "ocr_preview": text[:300]
401
  }
402
 
403
  except Exception as e:
404
 
405
  return {
406
  "success": False,
407
+ "message": "System error",
408
+ "reason": str(e)
409
+ }
410
+
411
+ finally:
412
+
413
+ # =========================
414
+ # CLEAN TEMP FILE
415
+ # =========================
416
+
417
+ if os.path.exists("temp.jpg"):
418
+ os.remove("temp.jpg")