Midnightar commited on
Commit
419ca28
·
verified ·
1 Parent(s): a952476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +307 -350
app.py CHANGED
@@ -21,6 +21,7 @@ reader = easyocr.Reader(['en'])
21
 
22
  class ImageRequest(BaseModel):
23
  image_url: str
 
24
 
25
  # =========================
26
  # DOWNLOAD IMAGE
@@ -73,7 +74,6 @@ def is_dark(image):
73
 
74
  return brightness < 50
75
 
76
-
77
  # =========================
78
  # OCR TEXT EXTRACTION
79
  # =========================
@@ -88,16 +88,14 @@ def extract_text(image_path):
88
 
89
  return text
90
 
91
-
92
  # =========================
93
- # DOCUMENT DETECTION
94
  # =========================
95
 
96
- def detect_document(text):
97
-
98
- # =========================
99
- # CLEAN TEXT
100
- # =========================
101
 
102
  text = text.lower().strip()
103
 
@@ -107,411 +105,349 @@ def detect_document(text):
107
  text
108
  )
109
 
110
- words = cleaned_text.split()
111
-
112
- # =========================
113
- # SCORE SYSTEM
114
- # =========================
115
-
116
- scores = {
117
- "nin": 0,
118
- "passport": 0,
119
- "drivers_license": 0,
120
- "voters_card": 0,
121
- "utility_bill": 0,
122
- "bank_statement": 0,
123
- "tenancy_agreement": 0,
124
- "vehicle_plate": 0,
125
- "vehicle_image": 0
126
- }
127
 
128
- matched_keywords = {
129
- "nin": [],
130
- "passport": [],
131
- "drivers_license": [],
132
- "voters_card": [],
133
- "utility_bill": [],
134
- "bank_statement": [],
135
- "tenancy_agreement": [],
136
- "vehicle_plate": [],
137
- "vehicle_image": []
138
- }
139
 
140
  # =========================
141
- # GARBAGE OCR DETECTION
142
  # =========================
143
 
144
- garbage_patterns = [
145
- r'^[a-z0-9]{4,8}$'
146
- ]
 
 
 
 
 
147
 
148
- garbage_count = 0
149
 
150
- for pattern in garbage_patterns:
151
 
152
- for word in words:
153
 
154
- if re.match(pattern, word):
155
- garbage_count += 1
156
 
157
  # =========================
158
- # NIN
159
  # =========================
160
 
161
- nin_keywords = [
162
- "national identification number",
163
- "national identity",
164
- "nimc"
165
- ]
166
 
167
- for keyword in nin_keywords:
 
 
 
 
 
168
 
169
- if keyword in cleaned_text:
170
 
171
- scores["nin"] += 5
172
- matched_keywords["nin"].append(keyword)
173
 
174
- # Weak standalone nin
175
- if " nin " in f" {cleaned_text} ":
176
 
177
- scores["nin"] += 1
178
- matched_keywords["nin"].append("nin")
179
 
180
  # =========================
181
- # PASSPORT
182
  # =========================
183
 
184
- passport_keywords = [
185
- "passport",
186
- "federal republic of nigeria",
187
- "nigeria passport",
188
- "international passport"
189
- ]
190
-
191
- for keyword in passport_keywords:
192
 
193
- if keyword in cleaned_text:
194
-
195
- scores["passport"] += 5
196
- matched_keywords["passport"].append(keyword)
197
-
198
- # =========================
199
- # DRIVER LICENSE
200
- # =========================
201
 
202
- license_keywords = [
203
- "driver",
204
- "license",
205
- "drivers licence",
206
- "driver licence",
207
- "frsc"
208
- ]
209
 
210
- for keyword in license_keywords:
211
 
212
- if keyword in cleaned_text:
213
 
214
- scores["drivers_license"] += 3
215
- matched_keywords["drivers_license"].append(keyword)
216
 
217
  # =========================
218
  # VOTER CARD
219
  # =========================
220
 
221
- voter_keywords = [
222
- "voter",
223
- "inec",
224
- "permanent voter",
225
- "polling unit"
226
- ]
 
 
 
 
227
 
228
- for keyword in voter_keywords:
229
 
230
- if keyword in cleaned_text:
231
 
232
- scores["voters_card"] += 4
233
- matched_keywords["voters_card"].append(keyword)
234
 
235
  # =========================
236
  # UTILITY BILL
237
  # =========================
238
 
239
- electricity_keywords = [
 
 
240
 
241
- # General
242
- "electricity",
243
- "electric bill",
244
- "power bill",
245
- "meter number",
246
- "meter no",
247
- "token",
248
- "kwh",
249
- "prepaid",
250
- "postpaid",
251
- "energy charge",
252
- "tariff",
253
 
254
- # Nigerian DISCOs
255
- "ibedc",
256
- "ibadan electricity",
257
 
258
- "ikedc",
259
- "ikeja electric",
260
 
261
- "ekedc",
262
- "eko electric",
263
 
264
- "aedc",
265
- "abuja electricity",
266
 
267
- "eedc",
268
- "enugu electricity",
269
 
270
- "bedc",
271
- "benin electricity",
272
 
273
- "jed",
274
- "jos electricity",
275
 
276
- "kedco",
277
- "kano electricity",
278
 
279
- "kaedco",
280
- "kaduna electric",
281
 
282
- "phed",
283
- "port harcourt electricity",
284
 
285
- "yedc",
286
- "yola electricity"
287
- ]
288
 
289
- for keyword in electricity_keywords:
290
 
291
- if keyword in cleaned_text:
292
 
293
- scores["utility_bill"] += 4
294
- matched_keywords["utility_bill"].append(keyword)
 
295
 
296
  # =========================
297
  # BANK STATEMENT
298
  # =========================
299
 
300
- bank_keywords = [
301
-
302
- "account statement",
303
- "statement of account",
304
- "transaction",
305
- "balance",
306
- "account number",
307
- "credit",
308
- "debit",
309
- "withdrawal",
310
- "deposit",
311
-
312
- # Nigerian Banks
313
- "access bank",
314
- "gtbank",
315
- "uba",
316
- "zenith bank",
317
- "first bank",
318
- "opay",
319
- "moniepoint",
320
- "kuda",
321
- "fcmb",
322
- "sterling bank",
323
- "wema bank",
324
- "providus",
325
- "fidelity bank",
326
- "union bank"
327
- ]
328
-
329
- for keyword in bank_keywords:
330
-
331
- if keyword in cleaned_text:
332
-
333
- scores["bank_statement"] += 3
334
- matched_keywords["bank_statement"].append(keyword)
 
 
 
335
 
336
  # =========================
337
  # TENANCY AGREEMENT
338
  # =========================
339
 
340
- tenancy_keywords = [
341
- "tenancy agreement",
342
- "landlord",
343
- "tenant",
344
- "rent",
345
- "property",
346
- "lease agreement",
347
- "rental agreement"
348
- ]
349
 
350
- for keyword in tenancy_keywords:
 
 
 
 
 
 
 
 
351
 
352
- if keyword in cleaned_text:
353
 
354
- scores["tenancy_agreement"] += 3
355
- matched_keywords["tenancy_agreement"].append(keyword)
356
 
357
- # =========================
358
- # VEHICLE KEYWORDS
359
- # =========================
360
 
361
- vehicle_keywords = [
362
- "toyota",
363
- "honda",
364
- "lexus",
365
- "benz",
366
- "mercedes",
367
- "ford",
368
- "jeep",
369
- "hyundai",
370
- "kia",
371
- "nissan",
372
- "camry",
373
- "corolla",
374
- "rav4",
375
- "pilot",
376
- "highlander",
377
- "vehicle",
378
- "plate number"
379
- ]
380
-
381
- for keyword in vehicle_keywords:
382
-
383
- if keyword in cleaned_text:
384
-
385
- scores["vehicle_image"] += 3
386
- matched_keywords["vehicle_image"].append(keyword)
387
 
388
  # =========================
389
- # NIGERIAN STATES
390
  # =========================
391
 
392
- nigeria_states = [
393
- "lagos",
394
- "abuja",
395
- "kano",
396
- "kaduna",
397
- "oyo",
398
- "ogun",
399
- "ondo",
400
- "osun",
401
- "kwara",
402
- "imo",
403
- "anambra",
404
- "enugu",
405
- "rivers",
406
- "delta",
407
- "edo",
408
- "cross river",
409
- "akwa ibom",
410
- "bayelsa",
411
- "plateau",
412
- "benue",
413
- "kogi",
414
- "ekiti",
415
- "niger",
416
- "zamfara",
417
- "sokoto",
418
- "katsina",
419
- "borno",
420
- "yobe",
421
- "adamawa",
422
- "taraba",
423
- "gombe",
424
- "bauchi",
425
- "jigawa",
426
- "nasarawa",
427
- "kebbi",
428
- "ebonyi"
429
- ]
430
-
431
- for state in nigeria_states:
432
-
433
- if state in cleaned_text:
434
-
435
- scores["vehicle_plate"] += 1
436
- matched_keywords["vehicle_plate"].append(state)
437
 
438
- # =========================
439
- # NIGERIAN PLATE PATTERNS
440
- # =========================
441
 
442
- plate_patterns = [
443
- r"[A-Z]{3}-?\d{3}[A-Z]{2}",
444
- r"[A-Z]{2}\d{3}[A-Z]{3}",
445
- r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
446
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
- for pattern in plate_patterns:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
 
450
- plate_match = re.search(
451
- pattern,
452
- cleaned_text.upper()
453
- )
454
 
455
- if plate_match:
456
 
457
- scores["vehicle_plate"] += 10
458
 
459
- matched_keywords[
460
- "vehicle_plate"
461
- ].append(
462
- plate_match.group()
463
- )
464
 
465
- # =========================
466
- # OCR GARBAGE PENALTY
467
- # =========================
468
 
469
- if garbage_count >= 5:
 
 
 
 
470
 
471
- for key in scores:
472
- scores[key] -= 2
473
 
474
- # =========================
475
- # BEST MATCH
476
- # =========================
 
477
 
478
- best_doc = max(
479
- scores,
480
- key=scores.get
481
- )
482
 
483
- best_score = scores[best_doc]
 
 
 
 
484
 
485
  # =========================
486
  # LOW CONFIDENCE
487
  # =========================
488
 
489
- if best_score <= 0:
490
- return None
491
-
492
- # =========================
493
- # CONFIDENCE
494
- # =========================
495
 
496
- confidence = min(
497
- 99,
498
- max(50, best_score * 5)
499
- )
500
 
501
- # =========================
502
- # RETURN RESULT
503
- # =========================
504
 
505
  return {
506
- "document_type": best_doc,
507
  "confidence": confidence,
508
- "matched_keywords": (
509
- matched_keywords[best_doc]
510
- ),
511
- "all_scores": scores
512
  }
513
 
514
-
515
  # =========================
516
  # HOME ROUTE
517
  # =========================
@@ -534,7 +470,6 @@ def home():
534
  ]
535
  }
536
 
537
-
538
  # =========================
539
  # VALIDATION ENDPOINT
540
  # =========================
@@ -546,6 +481,32 @@ async def validate_document(
546
 
547
  try:
548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
  # =========================
550
  # DOWNLOAD IMAGE
551
  # =========================
@@ -577,8 +538,9 @@ async def validate_document(
577
  "success": False,
578
  "message": "Invalid image",
579
  "reason": (
580
- "The downloaded file could "
581
- "not be read as an image."
 
582
  ),
583
  "suggestion": (
584
  "Ensure the URL points "
@@ -596,7 +558,8 @@ async def validate_document(
596
  "success": False,
597
  "message": "Image rejected",
598
  "reason": (
599
- "The uploaded image is blurry."
 
600
  ),
601
  "suggestion": (
602
  "Retake the photo "
@@ -605,7 +568,7 @@ async def validate_document(
605
  }
606
 
607
  # =========================
608
- # DARK IMAGE CHECK
609
  # =========================
610
 
611
  if is_dark(image):
@@ -614,7 +577,8 @@ async def validate_document(
614
  "success": False,
615
  "message": "Image rejected",
616
  "reason": (
617
- "The uploaded image is too dark."
 
618
  ),
619
  "suggestion": (
620
  "Take the photo in a "
@@ -623,7 +587,7 @@ async def validate_document(
623
  }
624
 
625
  # =========================
626
- # OCR TEXT EXTRACTION
627
  # =========================
628
 
629
  text = extract_text(image_path)
@@ -638,8 +602,9 @@ async def validate_document(
638
  "success": False,
639
  "message": "Document rejected",
640
  "reason": (
641
- "No readable text was "
642
- "detected in the image."
 
643
  ),
644
  "suggestion": (
645
  "Ensure the document "
@@ -648,15 +613,16 @@ async def validate_document(
648
  }
649
 
650
  # =========================
651
- # DOCUMENT DETECTION
652
  # =========================
653
 
654
- document_result = detect_document(
655
- text
 
656
  )
657
 
658
  # =========================
659
- # UNSUPPORTED DOCUMENT
660
  # =========================
661
 
662
  if document_result is None:
@@ -665,30 +631,21 @@ async def validate_document(
665
  "success": False,
666
  "message": "Document rejected",
667
  "reason": (
668
- "The uploaded image "
669
- "does not match any "
670
- "supported document type."
 
 
671
  ),
672
- "supported_documents": [
673
- "National ID (NIN)",
674
- "International Passport",
675
- "Driver License",
676
- "Voter Card",
677
- "Vehicle with Plate Number",
678
- "Utility Bill",
679
- "Bank Statement",
680
- "Tenancy Agreement"
681
- ],
682
  "possible_issues": [
 
 
683
  "Image is cropped",
684
- "Text is unreadable",
685
- "Unsupported document uploaded",
686
  "Poor lighting",
687
- "Low image quality",
688
- "Document too far from camera",
689
  "Document partially hidden"
690
- ],
691
- "ocr_preview": text[:300]
692
  }
693
 
694
  # =========================
@@ -697,7 +654,9 @@ async def validate_document(
697
 
698
  return {
699
  "success": True,
700
- "message": "Document verified successfully",
 
 
701
  "document_type": (
702
  document_result["document_type"]
703
  ),
@@ -707,9 +666,6 @@ async def validate_document(
707
  "matched_keywords": (
708
  document_result["matched_keywords"]
709
  ),
710
- "score_breakdown": (
711
- document_result["all_scores"]
712
- ),
713
  "ocr_preview": text[:300]
714
  }
715
 
@@ -728,4 +684,5 @@ async def validate_document(
728
  # =========================
729
 
730
  if os.path.exists("temp.jpg"):
 
731
  os.remove("temp.jpg")
 
21
 
22
  class ImageRequest(BaseModel):
23
  image_url: str
24
+ document_type: str
25
 
26
  # =========================
27
  # DOWNLOAD IMAGE
 
74
 
75
  return brightness < 50
76
 
 
77
  # =========================
78
  # OCR TEXT EXTRACTION
79
  # =========================
 
88
 
89
  return text
90
 
 
91
  # =========================
92
+ # DOCUMENT VALIDATION
93
  # =========================
94
 
95
+ def validate_document_type(
96
+ text,
97
+ document_type
98
+ ):
 
99
 
100
  text = text.lower().strip()
101
 
 
105
  text
106
  )
107
 
108
+ matched_keywords = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ confidence = 0
 
 
 
 
 
 
 
 
 
 
111
 
112
  # =========================
113
+ # NATIONAL ID (NIN)
114
  # =========================
115
 
116
+ if document_type == "National ID (NIN)":
117
+
118
+ keywords = [
119
+ "national identification number",
120
+ "national identity",
121
+ "nimc",
122
+ "nin"
123
+ ]
124
 
125
+ for keyword in keywords:
126
 
127
+ if keyword in cleaned_text:
128
 
129
+ matched_keywords.append(keyword)
130
 
131
+ confidence += 25
 
132
 
133
  # =========================
134
+ # INTERNATIONAL PASSPORT
135
  # =========================
136
 
137
+ elif document_type == "International Passport":
 
 
 
 
138
 
139
+ keywords = [
140
+ "passport",
141
+ "federal republic of nigeria",
142
+ "nigeria passport",
143
+ "international passport"
144
+ ]
145
 
146
+ for keyword in keywords:
147
 
148
+ if keyword in cleaned_text:
 
149
 
150
+ matched_keywords.append(keyword)
 
151
 
152
+ confidence += 25
 
153
 
154
  # =========================
155
+ # DRIVER LICENSE
156
  # =========================
157
 
158
+ elif document_type == "Driver License":
 
 
 
 
 
 
 
159
 
160
+ keywords = [
161
+ "driver",
162
+ "license",
163
+ "drivers licence",
164
+ "driver licence",
165
+ "frsc"
166
+ ]
 
167
 
168
+ for keyword in keywords:
 
 
 
 
 
 
169
 
170
+ if keyword in cleaned_text:
171
 
172
+ matched_keywords.append(keyword)
173
 
174
+ confidence += 20
 
175
 
176
  # =========================
177
  # VOTER CARD
178
  # =========================
179
 
180
+ elif document_type == "Voter Card":
181
+
182
+ keywords = [
183
+ "voter",
184
+ "inec",
185
+ "permanent voter",
186
+ "polling unit"
187
+ ]
188
+
189
+ for keyword in keywords:
190
 
191
+ if keyword in cleaned_text:
192
 
193
+ matched_keywords.append(keyword)
194
 
195
+ confidence += 25
 
196
 
197
  # =========================
198
  # UTILITY BILL
199
  # =========================
200
 
201
+ elif document_type == "Utility Bill":
202
+
203
+ keywords = [
204
 
205
+ # General
206
+ "electricity",
207
+ "electric bill",
208
+ "power bill",
209
+ "meter number",
210
+ "meter no",
211
+ "token",
212
+ "kwh",
213
+ "prepaid",
214
+ "postpaid",
215
+ "energy charge",
216
+ "tariff",
217
 
218
+ # Nigerian DISCOs
219
+ "ibedc",
220
+ "ibadan electricity",
221
 
222
+ "ikedc",
223
+ "ikeja electric",
224
 
225
+ "ekedc",
226
+ "eko electric",
227
 
228
+ "aedc",
229
+ "abuja electricity",
230
 
231
+ "eedc",
232
+ "enugu electricity",
233
 
234
+ "bedc",
235
+ "benin electricity",
236
 
237
+ "jed",
238
+ "jos electricity",
239
 
240
+ "kedco",
241
+ "kano electricity",
242
 
243
+ "kaedco",
244
+ "kaduna electric",
245
 
246
+ "phed",
247
+ "port harcourt electricity",
248
 
249
+ "yedc",
250
+ "yola electricity"
251
+ ]
252
 
253
+ for keyword in keywords:
254
 
255
+ if keyword in cleaned_text:
256
 
257
+ matched_keywords.append(keyword)
258
+
259
+ confidence += 15
260
 
261
  # =========================
262
  # BANK STATEMENT
263
  # =========================
264
 
265
+ elif document_type == "Bank Statement":
266
+
267
+ keywords = [
268
+
269
+ "account statement",
270
+ "statement of account",
271
+ "transaction",
272
+ "balance",
273
+ "account number",
274
+ "credit",
275
+ "debit",
276
+ "withdrawal",
277
+ "deposit",
278
+
279
+ # Nigerian Banks
280
+ "access bank",
281
+ "gtbank",
282
+ "uba",
283
+ "zenith bank",
284
+ "first bank",
285
+ "opay",
286
+ "moniepoint",
287
+ "kuda",
288
+ "fcmb",
289
+ "sterling bank",
290
+ "wema bank",
291
+ "providus",
292
+ "fidelity bank",
293
+ "union bank"
294
+ ]
295
+
296
+ for keyword in keywords:
297
+
298
+ if keyword in cleaned_text:
299
+
300
+ matched_keywords.append(keyword)
301
+
302
+ confidence += 15
303
 
304
  # =========================
305
  # TENANCY AGREEMENT
306
  # =========================
307
 
308
+ elif document_type == "Tenancy Agreement":
 
 
 
 
 
 
 
 
309
 
310
+ keywords = [
311
+ "tenancy agreement",
312
+ "landlord",
313
+ "tenant",
314
+ "rent",
315
+ "property",
316
+ "lease agreement",
317
+ "rental agreement"
318
+ ]
319
 
320
+ for keyword in keywords:
321
 
322
+ if keyword in cleaned_text:
 
323
 
324
+ matched_keywords.append(keyword)
 
 
325
 
326
+ confidence += 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
  # =========================
329
+ # VEHICLE WITH PLATE NUMBER
330
  # =========================
331
 
332
+ elif document_type == "Vehicle with Plate Number":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
+ vehicle_keywords = [
 
 
335
 
336
+ "toyota",
337
+ "honda",
338
+ "lexus",
339
+ "benz",
340
+ "mercedes",
341
+ "ford",
342
+ "jeep",
343
+ "hyundai",
344
+ "kia",
345
+ "nissan",
346
+ "camry",
347
+ "corolla",
348
+ "rav4",
349
+ "pilot",
350
+ "highlander",
351
+ "vehicle",
352
+ "plate number"
353
+ ]
354
 
355
+ for keyword in vehicle_keywords:
356
+
357
+ if keyword in cleaned_text:
358
+
359
+ matched_keywords.append(keyword)
360
+
361
+ confidence += 10
362
+
363
+ # Nigerian states
364
+
365
+ nigeria_states = [
366
+ "lagos",
367
+ "abuja",
368
+ "kano",
369
+ "kaduna",
370
+ "oyo",
371
+ "ogun",
372
+ "ondo",
373
+ "osun",
374
+ "kwara",
375
+ "imo",
376
+ "anambra",
377
+ "enugu",
378
+ "rivers",
379
+ "delta",
380
+ "edo",
381
+ "cross river",
382
+ "akwa ibom",
383
+ "bayelsa",
384
+ "plateau",
385
+ "benue",
386
+ "kogi",
387
+ "ekiti",
388
+ "niger",
389
+ "zamfara",
390
+ "sokoto",
391
+ "katsina",
392
+ "borno",
393
+ "yobe",
394
+ "adamawa",
395
+ "taraba",
396
+ "gombe",
397
+ "bauchi",
398
+ "jigawa",
399
+ "nasarawa",
400
+ "kebbi",
401
+ "ebonyi"
402
+ ]
403
 
404
+ for state in nigeria_states:
 
 
 
405
 
406
+ if state in cleaned_text:
407
 
408
+ matched_keywords.append(state)
409
 
410
+ confidence += 5
 
 
 
 
411
 
412
+ # Plate patterns
 
 
413
 
414
+ plate_patterns = [
415
+ r"[A-Z]{3}-?\d{3}[A-Z]{2}",
416
+ r"[A-Z]{2}\d{3}[A-Z]{3}",
417
+ r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
418
+ ]
419
 
420
+ for pattern in plate_patterns:
 
421
 
422
+ plate_match = re.search(
423
+ pattern,
424
+ cleaned_text.upper()
425
+ )
426
 
427
+ if plate_match:
 
 
 
428
 
429
+ matched_keywords.append(
430
+ plate_match.group()
431
+ )
432
+
433
+ confidence += 50
434
 
435
  # =========================
436
  # LOW CONFIDENCE
437
  # =========================
438
 
439
+ if confidence <= 0:
 
 
 
 
 
440
 
441
+ return None
 
 
 
442
 
443
+ confidence = min(confidence, 99)
 
 
444
 
445
  return {
446
+ "document_type": document_type,
447
  "confidence": confidence,
448
+ "matched_keywords": matched_keywords
 
 
 
449
  }
450
 
 
451
  # =========================
452
  # HOME ROUTE
453
  # =========================
 
470
  ]
471
  }
472
 
 
473
  # =========================
474
  # VALIDATION ENDPOINT
475
  # =========================
 
481
 
482
  try:
483
 
484
+ # =========================
485
+ # VALID DOCUMENT TYPES
486
+ # =========================
487
+
488
+ valid_document_types = [
489
+
490
+ "National ID (NIN)",
491
+ "International Passport",
492
+ "Driver License",
493
+ "Voter Card",
494
+ "Vehicle with Plate Number",
495
+ "Utility Bill",
496
+ "Bank Statement",
497
+ "Tenancy Agreement"
498
+ ]
499
+
500
+ if request.document_type not in valid_document_types:
501
+
502
+ return {
503
+ "success": False,
504
+ "message": "Invalid document type",
505
+ "supported_document_types": (
506
+ valid_document_types
507
+ )
508
+ }
509
+
510
  # =========================
511
  # DOWNLOAD IMAGE
512
  # =========================
 
538
  "success": False,
539
  "message": "Invalid image",
540
  "reason": (
541
+ "The downloaded file "
542
+ "could not be read "
543
+ "as an image."
544
  ),
545
  "suggestion": (
546
  "Ensure the URL points "
 
558
  "success": False,
559
  "message": "Image rejected",
560
  "reason": (
561
+ "The uploaded image "
562
+ "is blurry."
563
  ),
564
  "suggestion": (
565
  "Retake the photo "
 
568
  }
569
 
570
  # =========================
571
+ # DARK CHECK
572
  # =========================
573
 
574
  if is_dark(image):
 
577
  "success": False,
578
  "message": "Image rejected",
579
  "reason": (
580
+ "The uploaded image "
581
+ "is too dark."
582
  ),
583
  "suggestion": (
584
  "Take the photo in a "
 
587
  }
588
 
589
  # =========================
590
+ # OCR EXTRACTION
591
  # =========================
592
 
593
  text = extract_text(image_path)
 
602
  "success": False,
603
  "message": "Document rejected",
604
  "reason": (
605
+ "No readable text "
606
+ "was detected "
607
+ "in the image."
608
  ),
609
  "suggestion": (
610
  "Ensure the document "
 
613
  }
614
 
615
  # =========================
616
+ # VALIDATE DOCUMENT
617
  # =========================
618
 
619
+ document_result = validate_document_type(
620
+ text,
621
+ request.document_type
622
  )
623
 
624
  # =========================
625
+ # DOCUMENT FAILED
626
  # =========================
627
 
628
  if document_result is None:
 
631
  "success": False,
632
  "message": "Document rejected",
633
  "reason": (
634
+ f"The uploaded image "
635
+ f"does not match "
636
+ f"the expected "
637
+ f"document type: "
638
+ f"{request.document_type}"
639
  ),
640
+ "ocr_preview": text[:300],
 
 
 
 
 
 
 
 
 
641
  "possible_issues": [
642
+ "Wrong document uploaded",
643
+ "Image is blurry",
644
  "Image is cropped",
 
 
645
  "Poor lighting",
646
+ "Text not readable",
 
647
  "Document partially hidden"
648
+ ]
 
649
  }
650
 
651
  # =========================
 
654
 
655
  return {
656
  "success": True,
657
+ "message": (
658
+ "Document verified successfully"
659
+ ),
660
  "document_type": (
661
  document_result["document_type"]
662
  ),
 
666
  "matched_keywords": (
667
  document_result["matched_keywords"]
668
  ),
 
 
 
669
  "ocr_preview": text[:300]
670
  }
671
 
 
684
  # =========================
685
 
686
  if os.path.exists("temp.jpg"):
687
+
688
  os.remove("temp.jpg")