Midnightar commited on
Commit
0fc2bf7
·
verified ·
1 Parent(s): d86e67c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +262 -139
app.py CHANGED
@@ -1,9 +1,11 @@
1
- from fastapi import FastAPI, UploadFile, File
 
2
  import easyocr
3
  import cv2
4
  import numpy as np
5
  import re
6
  import os
 
7
 
8
  app = FastAPI()
9
 
@@ -13,18 +15,62 @@ app = FastAPI()
13
 
14
  reader = easyocr.Reader(['en'])
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # =========================
17
  # IMAGE QUALITY CHECKS
18
  # =========================
19
 
20
  def is_blurry(image):
21
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
22
- variance = cv2.Laplacian(gray, cv2.CV_64F).var()
 
 
 
 
 
 
 
 
 
23
  return variance < 100
24
 
25
 
26
  def is_dark(image):
 
27
  brightness = np.mean(image)
 
28
  return brightness < 50
29
 
30
 
@@ -33,8 +79,13 @@ def is_dark(image):
33
  # =========================
34
 
35
  def extract_text(image_path):
 
36
  results = reader.readtext(image_path)
37
- text = " ".join([r[1] for r in results]).lower()
 
 
 
 
38
  return text
39
 
40
 
@@ -44,36 +95,64 @@ def extract_text(image_path):
44
 
45
  def detect_document(text):
46
 
 
47
  # CLEAN TEXT
 
 
48
  text = text.lower().strip()
49
 
50
- # REMOVE EXTRA SYMBOLS
51
- cleaned_text = re.sub(r'[^a-zA-Z0-9\s-]', ' ', text)
 
 
 
52
 
53
- # SPLIT WORDS
54
  words = cleaned_text.split()
55
 
56
  # =========================
57
- # REJECT RANDOM OCR GARBAGE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  # =========================
59
 
60
  garbage_patterns = [
61
  r'^[a-z0-9]{4,8}$'
62
  ]
63
 
 
 
64
  for pattern in garbage_patterns:
 
65
  for word in words:
 
66
  if re.match(pattern, word):
67
- if len(words) <= 2:
68
- return {
69
- "document_type": "unknown",
70
- "confidence": 5,
71
- "matched_keywords": [word],
72
- "reason": (
73
- "OCR detected unreadable or "
74
- "meaningless text."
75
- )
76
- }
77
 
78
  # =========================
79
  # NIN
@@ -82,22 +161,21 @@ def detect_document(text):
82
  nin_keywords = [
83
  "national identification number",
84
  "national identity",
85
- "nin",
86
  "nimc"
87
  ]
88
 
89
- matched_keywords = []
90
-
91
  for keyword in nin_keywords:
 
92
  if keyword in cleaned_text:
93
- matched_keywords.append(keyword)
94
 
95
- if len(matched_keywords) > 0:
96
- return {
97
- "document_type": "nin",
98
- "confidence": 95,
99
- "matched_keywords": matched_keywords
100
- }
 
 
101
 
102
  # =========================
103
  # PASSPORT
@@ -106,21 +184,16 @@ def detect_document(text):
106
  passport_keywords = [
107
  "passport",
108
  "federal republic of nigeria",
109
- "nigeria passport"
 
110
  ]
111
 
112
- matched_keywords = []
113
-
114
  for keyword in passport_keywords:
 
115
  if keyword in cleaned_text:
116
- matched_keywords.append(keyword)
117
 
118
- if len(matched_keywords) > 0:
119
- return {
120
- "document_type": "passport",
121
- "confidence": 94,
122
- "matched_keywords": matched_keywords
123
- }
124
 
125
  # =========================
126
  # DRIVER LICENSE
@@ -134,18 +207,12 @@ def detect_document(text):
134
  "frsc"
135
  ]
136
 
137
- matched_keywords = []
138
-
139
  for keyword in license_keywords:
 
140
  if keyword in cleaned_text:
141
- matched_keywords.append(keyword)
142
 
143
- if len(matched_keywords) >= 2:
144
- return {
145
- "document_type": "drivers_license",
146
- "confidence": 92,
147
- "matched_keywords": matched_keywords
148
- }
149
 
150
  # =========================
151
  # VOTER CARD
@@ -158,21 +225,15 @@ def detect_document(text):
158
  "polling unit"
159
  ]
160
 
161
- matched_keywords = []
162
-
163
  for keyword in voter_keywords:
 
164
  if keyword in cleaned_text:
165
- matched_keywords.append(keyword)
166
 
167
- if len(matched_keywords) > 0:
168
- return {
169
- "document_type": "voters_card",
170
- "confidence": 90,
171
- "matched_keywords": matched_keywords
172
- }
173
 
174
  # =========================
175
- # ELECTRICITY COMPANIES
176
  # =========================
177
 
178
  electricity_keywords = [
@@ -182,57 +243,62 @@ def detect_document(text):
182
  "electric bill",
183
  "power bill",
184
  "meter number",
 
 
 
 
 
 
 
185
 
186
  # Nigerian DISCOs
187
  "ibedc",
188
  "ibadan electricity",
 
189
  "ikedc",
190
  "ikeja electric",
 
191
  "ekedc",
192
  "eko electric",
 
193
  "aedc",
194
  "abuja electricity",
 
195
  "eedc",
196
  "enugu electricity",
 
197
  "bedc",
198
  "benin electricity",
 
199
  "jed",
200
  "jos electricity",
 
201
  "kedco",
202
  "kano electricity",
 
203
  "kaedco",
204
  "kaduna electric",
 
205
  "phed",
206
  "port harcourt electricity",
207
- "yedc",
208
- "yola electricity",
209
 
210
- # Common Nigerian utility terms
211
- "prepaid",
212
- "postpaid",
213
- "disco",
214
- "energy charge",
215
- "tariff"
216
  ]
217
 
218
- matched_keywords = []
219
-
220
  for keyword in electricity_keywords:
 
221
  if keyword in cleaned_text:
222
- matched_keywords.append(keyword)
223
 
224
- if len(matched_keywords) > 0:
225
- return {
226
- "document_type": "utility_bill",
227
- "confidence": 90,
228
- "matched_keywords": matched_keywords
229
- }
230
 
231
  # =========================
232
  # BANK STATEMENT
233
  # =========================
234
 
235
  bank_keywords = [
 
236
  "account statement",
237
  "statement of account",
238
  "transaction",
@@ -253,21 +319,19 @@ def detect_document(text):
253
  "moniepoint",
254
  "kuda",
255
  "fcmb",
256
- "sterling bank"
 
 
 
 
257
  ]
258
 
259
- matched_keywords = []
260
-
261
  for keyword in bank_keywords:
 
262
  if keyword in cleaned_text:
263
- matched_keywords.append(keyword)
264
 
265
- if len(matched_keywords) > 0:
266
- return {
267
- "document_type": "bank_statement",
268
- "confidence": 91,
269
- "matched_keywords": matched_keywords
270
- }
271
 
272
  # =========================
273
  # TENANCY AGREEMENT
@@ -283,18 +347,12 @@ def detect_document(text):
283
  "rental agreement"
284
  ]
285
 
286
- matched_keywords = []
287
-
288
  for keyword in tenancy_keywords:
 
289
  if keyword in cleaned_text:
290
- matched_keywords.append(keyword)
291
 
292
- if len(matched_keywords) > 0:
293
- return {
294
- "document_type": "tenancy_agreement",
295
- "confidence": 89,
296
- "matched_keywords": matched_keywords
297
- }
298
 
299
  # =========================
300
  # VEHICLE KEYWORDS
@@ -320,11 +378,12 @@ def detect_document(text):
320
  "plate number"
321
  ]
322
 
323
- matched_keywords = []
324
-
325
  for keyword in vehicle_keywords:
 
326
  if keyword in cleaned_text:
327
- matched_keywords.append(keyword)
 
 
328
 
329
  # =========================
330
  # NIGERIAN STATES
@@ -369,11 +428,12 @@ def detect_document(text):
369
  "ebonyi"
370
  ]
371
 
372
- state_matches = []
373
-
374
  for state in nigeria_states:
 
375
  if state in cleaned_text:
376
- state_matches.append(state)
 
 
377
 
378
  # =========================
379
  # NIGERIAN PLATE PATTERNS
@@ -385,39 +445,71 @@ def detect_document(text):
385
  r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
386
  ]
387
 
388
- detected_plate = None
389
-
390
  for pattern in plate_patterns:
391
- plate_match = re.search(pattern, cleaned_text.upper())
 
 
 
 
392
 
393
  if plate_match:
394
- detected_plate = plate_match.group()
395
- break
 
 
 
 
 
 
396
 
397
  # =========================
398
- # VEHICLE DETECTION
399
  # =========================
400
 
401
- if detected_plate:
402
- return {
403
- "document_type": "vehicle_plate",
404
- "confidence": 97,
405
- "matched_keywords": [detected_plate] + state_matches
406
- }
407
 
408
- # VEHICLE WITHOUT CLEAR PLATE
409
- if len(matched_keywords) > 0:
410
- return {
411
- "document_type": "vehicle_image",
412
- "confidence": 75,
413
- "matched_keywords": matched_keywords
414
- }
415
 
416
  # =========================
417
- # UNKNOWN DOCUMENT
418
  # =========================
419
 
420
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
 
423
  # =========================
@@ -426,6 +518,7 @@ def detect_document(text):
426
 
427
  @app.get("/")
428
  def home():
 
429
  return {
430
  "success": True,
431
  "message": "Document Validation API Running",
@@ -447,18 +540,30 @@ def home():
447
  # =========================
448
 
449
  @app.post("/validate")
450
- async def validate_document(file: UploadFile = File(...)):
 
 
451
 
452
  try:
453
 
454
  # =========================
455
- # SAVE FILE
456
  # =========================
457
 
458
- image_path = "temp.jpg"
 
 
459
 
460
- with open(image_path, "wb") as f:
461
- f.write(await file.read())
 
 
 
 
 
 
 
 
462
 
463
  # =========================
464
  # READ IMAGE
@@ -467,15 +572,17 @@ async def validate_document(file: UploadFile = File(...)):
467
  image = cv2.imread(image_path)
468
 
469
  if image is None:
 
470
  return {
471
  "success": False,
472
  "message": "Invalid image",
473
  "reason": (
474
- "The uploaded file could not "
475
- "be read as an image."
476
  ),
477
  "suggestion": (
478
- "Upload a valid JPG or PNG image."
 
479
  )
480
  }
481
 
@@ -484,12 +591,16 @@ async def validate_document(file: UploadFile = File(...)):
484
  # =========================
485
 
486
  if is_blurry(image):
 
487
  return {
488
  "success": False,
489
  "message": "Image rejected",
490
- "reason": "The uploaded image is blurry.",
 
 
491
  "suggestion": (
492
- "Retake the photo with better focus."
 
493
  )
494
  }
495
 
@@ -498,12 +609,16 @@ async def validate_document(file: UploadFile = File(...)):
498
  # =========================
499
 
500
  if is_dark(image):
 
501
  return {
502
  "success": False,
503
  "message": "Image rejected",
504
- "reason": "The uploaded image is too dark.",
 
 
505
  "suggestion": (
506
- "Take the photo in a brighter environment."
 
507
  )
508
  }
509
 
@@ -518,16 +633,17 @@ async def validate_document(file: UploadFile = File(...)):
518
  # =========================
519
 
520
  if len(text.strip()) == 0:
 
521
  return {
522
  "success": False,
523
  "message": "Document rejected",
524
  "reason": (
525
- "No readable text was detected "
526
- "in the image."
527
  ),
528
  "suggestion": (
529
- "Ensure the document is clear "
530
- "and fully visible."
531
  )
532
  }
533
 
@@ -535,19 +651,23 @@ async def validate_document(file: UploadFile = File(...)):
535
  # DOCUMENT DETECTION
536
  # =========================
537
 
538
- document_result = detect_document(text)
 
 
539
 
540
  # =========================
541
  # UNSUPPORTED DOCUMENT
542
  # =========================
543
 
544
  if document_result is None:
 
545
  return {
546
  "success": False,
547
  "message": "Document rejected",
548
  "reason": (
549
- "The uploaded image does not match "
550
- "any supported document type."
 
551
  ),
552
  "supported_documents": [
553
  "National ID (NIN)",
@@ -587,6 +707,9 @@ async def validate_document(file: UploadFile = File(...)):
587
  "matched_keywords": (
588
  document_result["matched_keywords"]
589
  ),
 
 
 
590
  "ocr_preview": text[:300]
591
  }
592
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
  import easyocr
4
  import cv2
5
  import numpy as np
6
  import re
7
  import os
8
+ import requests
9
 
10
  app = FastAPI()
11
 
 
15
 
16
  reader = easyocr.Reader(['en'])
17
 
18
+ # =========================
19
+ # REQUEST MODEL
20
+ # =========================
21
+
22
+ class ImageRequest(BaseModel):
23
+ image_url: str
24
+
25
+ # =========================
26
+ # DOWNLOAD IMAGE
27
+ # =========================
28
+
29
+ def download_image(url):
30
+
31
+ try:
32
+
33
+ response = requests.get(
34
+ url,
35
+ timeout=30
36
+ )
37
+
38
+ if response.status_code != 200:
39
+ return None
40
+
41
+ image_path = "temp.jpg"
42
+
43
+ with open(image_path, "wb") as f:
44
+ f.write(response.content)
45
+
46
+ return image_path
47
+
48
+ except:
49
+ return None
50
+
51
  # =========================
52
  # IMAGE QUALITY CHECKS
53
  # =========================
54
 
55
  def is_blurry(image):
56
+
57
+ gray = cv2.cvtColor(
58
+ image,
59
+ cv2.COLOR_BGR2GRAY
60
+ )
61
+
62
+ variance = cv2.Laplacian(
63
+ gray,
64
+ cv2.CV_64F
65
+ ).var()
66
+
67
  return variance < 100
68
 
69
 
70
  def is_dark(image):
71
+
72
  brightness = np.mean(image)
73
+
74
  return brightness < 50
75
 
76
 
 
79
  # =========================
80
 
81
  def extract_text(image_path):
82
+
83
  results = reader.readtext(image_path)
84
+
85
+ text = " ".join(
86
+ [r[1] for r in results]
87
+ ).lower()
88
+
89
  return text
90
 
91
 
 
95
 
96
  def detect_document(text):
97
 
98
+ # =========================
99
  # CLEAN TEXT
100
+ # =========================
101
+
102
  text = text.lower().strip()
103
 
104
+ cleaned_text = re.sub(
105
+ r'[^a-zA-Z0-9\s-]',
106
+ ' ',
107
+ text
108
+ )
109
 
 
110
  words = cleaned_text.split()
111
 
112
  # =========================
113
+ # SCORE SYSTEM
114
+ # =========================
115
+
116
+ scores = {
117
+ "nin": 0,
118
+ "passport": 0,
119
+ "drivers_license": 0,
120
+ "voters_card": 0,
121
+ "utility_bill": 0,
122
+ "bank_statement": 0,
123
+ "tenancy_agreement": 0,
124
+ "vehicle_plate": 0,
125
+ "vehicle_image": 0
126
+ }
127
+
128
+ matched_keywords = {
129
+ "nin": [],
130
+ "passport": [],
131
+ "drivers_license": [],
132
+ "voters_card": [],
133
+ "utility_bill": [],
134
+ "bank_statement": [],
135
+ "tenancy_agreement": [],
136
+ "vehicle_plate": [],
137
+ "vehicle_image": []
138
+ }
139
+
140
+ # =========================
141
+ # GARBAGE OCR DETECTION
142
  # =========================
143
 
144
  garbage_patterns = [
145
  r'^[a-z0-9]{4,8}$'
146
  ]
147
 
148
+ garbage_count = 0
149
+
150
  for pattern in garbage_patterns:
151
+
152
  for word in words:
153
+
154
  if re.match(pattern, word):
155
+ garbage_count += 1
 
 
 
 
 
 
 
 
 
156
 
157
  # =========================
158
  # NIN
 
161
  nin_keywords = [
162
  "national identification number",
163
  "national identity",
 
164
  "nimc"
165
  ]
166
 
 
 
167
  for keyword in nin_keywords:
168
+
169
  if keyword in cleaned_text:
 
170
 
171
+ scores["nin"] += 5
172
+ matched_keywords["nin"].append(keyword)
173
+
174
+ # Weak standalone nin
175
+ if " nin " in f" {cleaned_text} ":
176
+
177
+ scores["nin"] += 1
178
+ matched_keywords["nin"].append("nin")
179
 
180
  # =========================
181
  # PASSPORT
 
184
  passport_keywords = [
185
  "passport",
186
  "federal republic of nigeria",
187
+ "nigeria passport",
188
+ "international passport"
189
  ]
190
 
 
 
191
  for keyword in passport_keywords:
192
+
193
  if keyword in cleaned_text:
 
194
 
195
+ scores["passport"] += 5
196
+ matched_keywords["passport"].append(keyword)
 
 
 
 
197
 
198
  # =========================
199
  # DRIVER LICENSE
 
207
  "frsc"
208
  ]
209
 
 
 
210
  for keyword in license_keywords:
211
+
212
  if keyword in cleaned_text:
 
213
 
214
+ scores["drivers_license"] += 3
215
+ matched_keywords["drivers_license"].append(keyword)
 
 
 
 
216
 
217
  # =========================
218
  # VOTER CARD
 
225
  "polling unit"
226
  ]
227
 
 
 
228
  for keyword in voter_keywords:
229
+
230
  if keyword in cleaned_text:
 
231
 
232
+ scores["voters_card"] += 4
233
+ matched_keywords["voters_card"].append(keyword)
 
 
 
 
234
 
235
  # =========================
236
+ # UTILITY BILL
237
  # =========================
238
 
239
  electricity_keywords = [
 
243
  "electric bill",
244
  "power bill",
245
  "meter number",
246
+ "meter no",
247
+ "token",
248
+ "kwh",
249
+ "prepaid",
250
+ "postpaid",
251
+ "energy charge",
252
+ "tariff",
253
 
254
  # Nigerian DISCOs
255
  "ibedc",
256
  "ibadan electricity",
257
+
258
  "ikedc",
259
  "ikeja electric",
260
+
261
  "ekedc",
262
  "eko electric",
263
+
264
  "aedc",
265
  "abuja electricity",
266
+
267
  "eedc",
268
  "enugu electricity",
269
+
270
  "bedc",
271
  "benin electricity",
272
+
273
  "jed",
274
  "jos electricity",
275
+
276
  "kedco",
277
  "kano electricity",
278
+
279
  "kaedco",
280
  "kaduna electric",
281
+
282
  "phed",
283
  "port harcourt electricity",
 
 
284
 
285
+ "yedc",
286
+ "yola electricity"
 
 
 
 
287
  ]
288
 
 
 
289
  for keyword in electricity_keywords:
290
+
291
  if keyword in cleaned_text:
 
292
 
293
+ scores["utility_bill"] += 4
294
+ matched_keywords["utility_bill"].append(keyword)
 
 
 
 
295
 
296
  # =========================
297
  # BANK STATEMENT
298
  # =========================
299
 
300
  bank_keywords = [
301
+
302
  "account statement",
303
  "statement of account",
304
  "transaction",
 
319
  "moniepoint",
320
  "kuda",
321
  "fcmb",
322
+ "sterling bank",
323
+ "wema bank",
324
+ "providus",
325
+ "fidelity bank",
326
+ "union bank"
327
  ]
328
 
 
 
329
  for keyword in bank_keywords:
330
+
331
  if keyword in cleaned_text:
 
332
 
333
+ scores["bank_statement"] += 3
334
+ matched_keywords["bank_statement"].append(keyword)
 
 
 
 
335
 
336
  # =========================
337
  # TENANCY AGREEMENT
 
347
  "rental agreement"
348
  ]
349
 
 
 
350
  for keyword in tenancy_keywords:
351
+
352
  if keyword in cleaned_text:
 
353
 
354
+ scores["tenancy_agreement"] += 3
355
+ matched_keywords["tenancy_agreement"].append(keyword)
 
 
 
 
356
 
357
  # =========================
358
  # VEHICLE KEYWORDS
 
378
  "plate number"
379
  ]
380
 
 
 
381
  for keyword in vehicle_keywords:
382
+
383
  if keyword in cleaned_text:
384
+
385
+ scores["vehicle_image"] += 3
386
+ matched_keywords["vehicle_image"].append(keyword)
387
 
388
  # =========================
389
  # NIGERIAN STATES
 
428
  "ebonyi"
429
  ]
430
 
 
 
431
  for state in nigeria_states:
432
+
433
  if state in cleaned_text:
434
+
435
+ scores["vehicle_plate"] += 1
436
+ matched_keywords["vehicle_plate"].append(state)
437
 
438
  # =========================
439
  # NIGERIAN PLATE PATTERNS
 
445
  r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
446
  ]
447
 
 
 
448
  for pattern in plate_patterns:
449
+
450
+ plate_match = re.search(
451
+ pattern,
452
+ cleaned_text.upper()
453
+ )
454
 
455
  if plate_match:
456
+
457
+ scores["vehicle_plate"] += 10
458
+
459
+ matched_keywords[
460
+ "vehicle_plate"
461
+ ].append(
462
+ plate_match.group()
463
+ )
464
 
465
  # =========================
466
+ # OCR GARBAGE PENALTY
467
  # =========================
468
 
469
+ if garbage_count >= 5:
 
 
 
 
 
470
 
471
+ for key in scores:
472
+ scores[key] -= 2
 
 
 
 
 
473
 
474
  # =========================
475
+ # BEST MATCH
476
  # =========================
477
 
478
+ best_doc = max(
479
+ scores,
480
+ key=scores.get
481
+ )
482
+
483
+ best_score = scores[best_doc]
484
+
485
+ # =========================
486
+ # LOW CONFIDENCE
487
+ # =========================
488
+
489
+ if best_score <= 0:
490
+ return None
491
+
492
+ # =========================
493
+ # CONFIDENCE
494
+ # =========================
495
+
496
+ confidence = min(
497
+ 99,
498
+ max(50, best_score * 5)
499
+ )
500
+
501
+ # =========================
502
+ # RETURN RESULT
503
+ # =========================
504
+
505
+ return {
506
+ "document_type": best_doc,
507
+ "confidence": confidence,
508
+ "matched_keywords": (
509
+ matched_keywords[best_doc]
510
+ ),
511
+ "all_scores": scores
512
+ }
513
 
514
 
515
  # =========================
 
518
 
519
  @app.get("/")
520
  def home():
521
+
522
  return {
523
  "success": True,
524
  "message": "Document Validation API Running",
 
540
  # =========================
541
 
542
  @app.post("/validate")
543
+ async def validate_document(
544
+ request: ImageRequest
545
+ ):
546
 
547
  try:
548
 
549
  # =========================
550
+ # DOWNLOAD IMAGE
551
  # =========================
552
 
553
+ image_path = download_image(
554
+ request.image_url
555
+ )
556
 
557
+ if image_path is None:
558
+
559
+ return {
560
+ "success": False,
561
+ "message": "Image download failed",
562
+ "reason": (
563
+ "Could not download image "
564
+ "from URL."
565
+ )
566
+ }
567
 
568
  # =========================
569
  # READ IMAGE
 
572
  image = cv2.imread(image_path)
573
 
574
  if image is None:
575
+
576
  return {
577
  "success": False,
578
  "message": "Invalid image",
579
  "reason": (
580
+ "The downloaded file could "
581
+ "not be read as an image."
582
  ),
583
  "suggestion": (
584
+ "Ensure the URL points "
585
+ "directly to an image."
586
  )
587
  }
588
 
 
591
  # =========================
592
 
593
  if is_blurry(image):
594
+
595
  return {
596
  "success": False,
597
  "message": "Image rejected",
598
+ "reason": (
599
+ "The uploaded image is blurry."
600
+ ),
601
  "suggestion": (
602
+ "Retake the photo "
603
+ "with better focus."
604
  )
605
  }
606
 
 
609
  # =========================
610
 
611
  if is_dark(image):
612
+
613
  return {
614
  "success": False,
615
  "message": "Image rejected",
616
+ "reason": (
617
+ "The uploaded image is too dark."
618
+ ),
619
  "suggestion": (
620
+ "Take the photo in a "
621
+ "brighter environment."
622
  )
623
  }
624
 
 
633
  # =========================
634
 
635
  if len(text.strip()) == 0:
636
+
637
  return {
638
  "success": False,
639
  "message": "Document rejected",
640
  "reason": (
641
+ "No readable text was "
642
+ "detected in the image."
643
  ),
644
  "suggestion": (
645
+ "Ensure the document "
646
+ "is clear and visible."
647
  )
648
  }
649
 
 
651
  # DOCUMENT DETECTION
652
  # =========================
653
 
654
+ document_result = detect_document(
655
+ text
656
+ )
657
 
658
  # =========================
659
  # UNSUPPORTED DOCUMENT
660
  # =========================
661
 
662
  if document_result is None:
663
+
664
  return {
665
  "success": False,
666
  "message": "Document rejected",
667
  "reason": (
668
+ "The uploaded image "
669
+ "does not match any "
670
+ "supported document type."
671
  ),
672
  "supported_documents": [
673
  "National ID (NIN)",
 
707
  "matched_keywords": (
708
  document_result["matched_keywords"]
709
  ),
710
+ "score_breakdown": (
711
+ document_result["all_scores"]
712
+ ),
713
  "ocr_preview": text[:300]
714
  }
715