Midnightar commited on
Commit
d86e67c
·
verified ·
1 Parent(s): 968cf82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -117
app.py CHANGED
@@ -1,5 +1,4 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
  import easyocr
4
  import cv2
5
  import numpy as np
@@ -14,36 +13,18 @@ app = FastAPI()
14
 
15
  reader = easyocr.Reader(['en'])
16
 
17
- # =========================
18
- # REQUEST MODEL
19
- # =========================
20
-
21
- class ImageRequest(BaseModel):
22
- image_path: str
23
-
24
  # =========================
25
  # IMAGE QUALITY CHECKS
26
  # =========================
27
 
28
  def is_blurry(image):
29
-
30
- gray = cv2.cvtColor(
31
- image,
32
- cv2.COLOR_BGR2GRAY
33
- )
34
-
35
- variance = cv2.Laplacian(
36
- gray,
37
- cv2.CV_64F
38
- ).var()
39
-
40
  return variance < 100
41
 
42
 
43
  def is_dark(image):
44
-
45
  brightness = np.mean(image)
46
-
47
  return brightness < 50
48
 
49
 
@@ -52,13 +33,8 @@ def is_dark(image):
52
  # =========================
53
 
54
  def extract_text(image_path):
55
-
56
  results = reader.readtext(image_path)
57
-
58
- text = " ".join(
59
- [r[1] for r in results]
60
- ).lower()
61
-
62
  return text
63
 
64
 
@@ -72,11 +48,7 @@ def detect_document(text):
72
  text = text.lower().strip()
73
 
74
  # REMOVE EXTRA SYMBOLS
75
- cleaned_text = re.sub(
76
- r'[^a-zA-Z0-9\s-]',
77
- ' ',
78
- text
79
- )
80
 
81
  # SPLIT WORDS
82
  words = cleaned_text.split()
@@ -90,11 +62,8 @@ def detect_document(text):
90
  ]
91
 
92
  for pattern in garbage_patterns:
93
-
94
  for word in words:
95
-
96
  if re.match(pattern, word):
97
-
98
  if len(words) <= 2:
99
  return {
100
  "document_type": "unknown",
@@ -120,12 +89,10 @@ def detect_document(text):
120
  matched_keywords = []
121
 
122
  for keyword in nin_keywords:
123
-
124
  if keyword in cleaned_text:
125
  matched_keywords.append(keyword)
126
 
127
  if len(matched_keywords) > 0:
128
-
129
  return {
130
  "document_type": "nin",
131
  "confidence": 95,
@@ -145,12 +112,10 @@ def detect_document(text):
145
  matched_keywords = []
146
 
147
  for keyword in passport_keywords:
148
-
149
  if keyword in cleaned_text:
150
  matched_keywords.append(keyword)
151
 
152
  if len(matched_keywords) > 0:
153
-
154
  return {
155
  "document_type": "passport",
156
  "confidence": 94,
@@ -172,12 +137,10 @@ def detect_document(text):
172
  matched_keywords = []
173
 
174
  for keyword in license_keywords:
175
-
176
  if keyword in cleaned_text:
177
  matched_keywords.append(keyword)
178
 
179
  if len(matched_keywords) >= 2:
180
-
181
  return {
182
  "document_type": "drivers_license",
183
  "confidence": 92,
@@ -198,12 +161,10 @@ def detect_document(text):
198
  matched_keywords = []
199
 
200
  for keyword in voter_keywords:
201
-
202
  if keyword in cleaned_text:
203
  matched_keywords.append(keyword)
204
 
205
  if len(matched_keywords) > 0:
206
-
207
  return {
208
  "document_type": "voters_card",
209
  "confidence": 90,
@@ -211,7 +172,7 @@ def detect_document(text):
211
  }
212
 
213
  # =========================
214
- # ELECTRICITY / UTILITY BILL
215
  # =========================
216
 
217
  electricity_keywords = [
@@ -246,7 +207,7 @@ def detect_document(text):
246
  "yedc",
247
  "yola electricity",
248
 
249
- # Common terms
250
  "prepaid",
251
  "postpaid",
252
  "disco",
@@ -257,12 +218,10 @@ def detect_document(text):
257
  matched_keywords = []
258
 
259
  for keyword in electricity_keywords:
260
-
261
  if keyword in cleaned_text:
262
  matched_keywords.append(keyword)
263
 
264
  if len(matched_keywords) > 0:
265
-
266
  return {
267
  "document_type": "utility_bill",
268
  "confidence": 90,
@@ -274,7 +233,6 @@ def detect_document(text):
274
  # =========================
275
 
276
  bank_keywords = [
277
-
278
  "account statement",
279
  "statement of account",
280
  "transaction",
@@ -301,12 +259,10 @@ def detect_document(text):
301
  matched_keywords = []
302
 
303
  for keyword in bank_keywords:
304
-
305
  if keyword in cleaned_text:
306
  matched_keywords.append(keyword)
307
 
308
  if len(matched_keywords) > 0:
309
-
310
  return {
311
  "document_type": "bank_statement",
312
  "confidence": 91,
@@ -318,7 +274,6 @@ def detect_document(text):
318
  # =========================
319
 
320
  tenancy_keywords = [
321
-
322
  "tenancy agreement",
323
  "landlord",
324
  "tenant",
@@ -331,12 +286,10 @@ def detect_document(text):
331
  matched_keywords = []
332
 
333
  for keyword in tenancy_keywords:
334
-
335
  if keyword in cleaned_text:
336
  matched_keywords.append(keyword)
337
 
338
  if len(matched_keywords) > 0:
339
-
340
  return {
341
  "document_type": "tenancy_agreement",
342
  "confidence": 89,
@@ -348,7 +301,6 @@ def detect_document(text):
348
  # =========================
349
 
350
  vehicle_keywords = [
351
-
352
  "toyota",
353
  "honda",
354
  "lexus",
@@ -371,7 +323,6 @@ def detect_document(text):
371
  matched_keywords = []
372
 
373
  for keyword in vehicle_keywords:
374
-
375
  if keyword in cleaned_text:
376
  matched_keywords.append(keyword)
377
 
@@ -380,7 +331,6 @@ def detect_document(text):
380
  # =========================
381
 
382
  nigeria_states = [
383
-
384
  "lagos",
385
  "abuja",
386
  "kano",
@@ -422,7 +372,6 @@ def detect_document(text):
422
  state_matches = []
423
 
424
  for state in nigeria_states:
425
-
426
  if state in cleaned_text:
427
  state_matches.append(state)
428
 
@@ -431,7 +380,6 @@ def detect_document(text):
431
  # =========================
432
 
433
  plate_patterns = [
434
-
435
  r"[A-Z]{3}-?\d{3}[A-Z]{2}",
436
  r"[A-Z]{2}\d{3}[A-Z]{3}",
437
  r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
@@ -440,16 +388,10 @@ def detect_document(text):
440
  detected_plate = None
441
 
442
  for pattern in plate_patterns:
443
-
444
- plate_match = re.search(
445
- pattern,
446
- cleaned_text.upper()
447
- )
448
 
449
  if plate_match:
450
-
451
  detected_plate = plate_match.group()
452
-
453
  break
454
 
455
  # =========================
@@ -457,18 +399,14 @@ def detect_document(text):
457
  # =========================
458
 
459
  if detected_plate:
460
-
461
  return {
462
  "document_type": "vehicle_plate",
463
  "confidence": 97,
464
- "matched_keywords": [
465
- detected_plate
466
- ] + state_matches
467
  }
468
 
469
  # VEHICLE WITHOUT CLEAR PLATE
470
  if len(matched_keywords) > 0:
471
-
472
  return {
473
  "document_type": "vehicle_image",
474
  "confidence": 75,
@@ -488,7 +426,6 @@ def detect_document(text):
488
 
489
  @app.get("/")
490
  def home():
491
-
492
  return {
493
  "success": True,
494
  "message": "Document Validation API Running",
@@ -510,28 +447,18 @@ def home():
510
  # =========================
511
 
512
  @app.post("/validate")
513
- async def validate_document(
514
- request: ImageRequest
515
- ):
516
 
517
  try:
518
 
519
- image_path = request.image_path
520
-
521
  # =========================
522
- # CHECK FILE EXISTS
523
  # =========================
524
 
525
- if not os.path.exists(image_path):
526
 
527
- return {
528
- "success": False,
529
- "message": "Image not found",
530
- "reason": (
531
- "The provided image path "
532
- "does not exist."
533
- )
534
- }
535
 
536
  # =========================
537
  # READ IMAGE
@@ -540,16 +467,15 @@ async def validate_document(
540
  image = cv2.imread(image_path)
541
 
542
  if image is None:
543
-
544
  return {
545
  "success": False,
546
  "message": "Invalid image",
547
  "reason": (
548
- "The file could not be "
549
- "read as an image."
550
  ),
551
  "suggestion": (
552
- "Provide a valid JPG or PNG image."
553
  )
554
  }
555
 
@@ -558,14 +484,12 @@ async def validate_document(
558
  # =========================
559
 
560
  if is_blurry(image):
561
-
562
  return {
563
  "success": False,
564
  "message": "Image rejected",
565
- "reason": "The image is blurry.",
566
  "suggestion": (
567
- "Retake the photo with "
568
- "better focus."
569
  )
570
  }
571
 
@@ -574,13 +498,12 @@ async def validate_document(
574
  # =========================
575
 
576
  if is_dark(image):
577
-
578
  return {
579
  "success": False,
580
  "message": "Image rejected",
581
- "reason": "The image is too dark.",
582
  "suggestion": (
583
- "Use better lighting."
584
  )
585
  }
586
 
@@ -595,16 +518,16 @@ async def validate_document(
595
  # =========================
596
 
597
  if len(text.strip()) == 0:
598
-
599
  return {
600
  "success": False,
601
  "message": "Document rejected",
602
  "reason": (
603
- "No readable text was detected."
 
604
  ),
605
  "suggestion": (
606
- "Ensure the document is "
607
- "clear and fully visible."
608
  )
609
  }
610
 
@@ -619,13 +542,12 @@ async def validate_document(
619
  # =========================
620
 
621
  if document_result is None:
622
-
623
  return {
624
  "success": False,
625
  "message": "Document rejected",
626
  "reason": (
627
- "The uploaded image does not "
628
- "match any supported document type."
629
  ),
630
  "supported_documents": [
631
  "National ID (NIN)",
@@ -676,17 +598,11 @@ async def validate_document(
676
  "reason": str(e)
677
  }
678
 
 
679
 
680
- # =========================
681
- # RUN SERVER
682
- # =========================
683
-
684
- if __name__ == "__main__":
685
-
686
- import uvicorn
687
 
688
- uvicorn.run(
689
- app,
690
- host="0.0.0.0",
691
- port=7860
692
- )
 
1
+ from fastapi import FastAPI, UploadFile, File
 
2
  import easyocr
3
  import cv2
4
  import numpy as np
 
13
 
14
  reader = easyocr.Reader(['en'])
15
 
 
 
 
 
 
 
 
16
  # =========================
17
  # IMAGE QUALITY CHECKS
18
  # =========================
19
 
20
  def is_blurry(image):
21
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
22
+ variance = cv2.Laplacian(gray, cv2.CV_64F).var()
 
 
 
 
 
 
 
 
 
23
  return variance < 100
24
 
25
 
26
  def is_dark(image):
 
27
  brightness = np.mean(image)
 
28
  return brightness < 50
29
 
30
 
 
33
  # =========================
34
 
35
  def extract_text(image_path):
 
36
  results = reader.readtext(image_path)
37
+ text = " ".join([r[1] for r in results]).lower()
 
 
 
 
38
  return text
39
 
40
 
 
48
  text = text.lower().strip()
49
 
50
  # REMOVE EXTRA SYMBOLS
51
+ cleaned_text = re.sub(r'[^a-zA-Z0-9\s-]', ' ', text)
 
 
 
 
52
 
53
  # SPLIT WORDS
54
  words = cleaned_text.split()
 
62
  ]
63
 
64
  for pattern in garbage_patterns:
 
65
  for word in words:
 
66
  if re.match(pattern, word):
 
67
  if len(words) <= 2:
68
  return {
69
  "document_type": "unknown",
 
89
  matched_keywords = []
90
 
91
  for keyword in nin_keywords:
 
92
  if keyword in cleaned_text:
93
  matched_keywords.append(keyword)
94
 
95
  if len(matched_keywords) > 0:
 
96
  return {
97
  "document_type": "nin",
98
  "confidence": 95,
 
112
  matched_keywords = []
113
 
114
  for keyword in passport_keywords:
 
115
  if keyword in cleaned_text:
116
  matched_keywords.append(keyword)
117
 
118
  if len(matched_keywords) > 0:
 
119
  return {
120
  "document_type": "passport",
121
  "confidence": 94,
 
137
  matched_keywords = []
138
 
139
  for keyword in license_keywords:
 
140
  if keyword in cleaned_text:
141
  matched_keywords.append(keyword)
142
 
143
  if len(matched_keywords) >= 2:
 
144
  return {
145
  "document_type": "drivers_license",
146
  "confidence": 92,
 
161
  matched_keywords = []
162
 
163
  for keyword in voter_keywords:
 
164
  if keyword in cleaned_text:
165
  matched_keywords.append(keyword)
166
 
167
  if len(matched_keywords) > 0:
 
168
  return {
169
  "document_type": "voters_card",
170
  "confidence": 90,
 
172
  }
173
 
174
  # =========================
175
+ # ELECTRICITY COMPANIES
176
  # =========================
177
 
178
  electricity_keywords = [
 
207
  "yedc",
208
  "yola electricity",
209
 
210
+ # Common Nigerian utility terms
211
  "prepaid",
212
  "postpaid",
213
  "disco",
 
218
  matched_keywords = []
219
 
220
  for keyword in electricity_keywords:
 
221
  if keyword in cleaned_text:
222
  matched_keywords.append(keyword)
223
 
224
  if len(matched_keywords) > 0:
 
225
  return {
226
  "document_type": "utility_bill",
227
  "confidence": 90,
 
233
  # =========================
234
 
235
  bank_keywords = [
 
236
  "account statement",
237
  "statement of account",
238
  "transaction",
 
259
  matched_keywords = []
260
 
261
  for keyword in bank_keywords:
 
262
  if keyword in cleaned_text:
263
  matched_keywords.append(keyword)
264
 
265
  if len(matched_keywords) > 0:
 
266
  return {
267
  "document_type": "bank_statement",
268
  "confidence": 91,
 
274
  # =========================
275
 
276
  tenancy_keywords = [
 
277
  "tenancy agreement",
278
  "landlord",
279
  "tenant",
 
286
  matched_keywords = []
287
 
288
  for keyword in tenancy_keywords:
 
289
  if keyword in cleaned_text:
290
  matched_keywords.append(keyword)
291
 
292
  if len(matched_keywords) > 0:
 
293
  return {
294
  "document_type": "tenancy_agreement",
295
  "confidence": 89,
 
301
  # =========================
302
 
303
  vehicle_keywords = [
 
304
  "toyota",
305
  "honda",
306
  "lexus",
 
323
  matched_keywords = []
324
 
325
  for keyword in vehicle_keywords:
 
326
  if keyword in cleaned_text:
327
  matched_keywords.append(keyword)
328
 
 
331
  # =========================
332
 
333
  nigeria_states = [
 
334
  "lagos",
335
  "abuja",
336
  "kano",
 
372
  state_matches = []
373
 
374
  for state in nigeria_states:
 
375
  if state in cleaned_text:
376
  state_matches.append(state)
377
 
 
380
  # =========================
381
 
382
  plate_patterns = [
 
383
  r"[A-Z]{3}-?\d{3}[A-Z]{2}",
384
  r"[A-Z]{2}\d{3}[A-Z]{3}",
385
  r"[A-Z]{3}\s\d{3}\s[A-Z]{2}"
 
388
  detected_plate = None
389
 
390
  for pattern in plate_patterns:
391
+ plate_match = re.search(pattern, cleaned_text.upper())
 
 
 
 
392
 
393
  if plate_match:
 
394
  detected_plate = plate_match.group()
 
395
  break
396
 
397
  # =========================
 
399
  # =========================
400
 
401
  if detected_plate:
 
402
  return {
403
  "document_type": "vehicle_plate",
404
  "confidence": 97,
405
+ "matched_keywords": [detected_plate] + state_matches
 
 
406
  }
407
 
408
  # VEHICLE WITHOUT CLEAR PLATE
409
  if len(matched_keywords) > 0:
 
410
  return {
411
  "document_type": "vehicle_image",
412
  "confidence": 75,
 
426
 
427
  @app.get("/")
428
  def home():
 
429
  return {
430
  "success": True,
431
  "message": "Document Validation API Running",
 
447
  # =========================
448
 
449
  @app.post("/validate")
450
+ async def validate_document(file: UploadFile = File(...)):
 
 
451
 
452
  try:
453
 
 
 
454
  # =========================
455
+ # SAVE FILE
456
  # =========================
457
 
458
+ image_path = "temp.jpg"
459
 
460
+ with open(image_path, "wb") as f:
461
+ f.write(await file.read())
 
 
 
 
 
 
462
 
463
  # =========================
464
  # READ IMAGE
 
467
  image = cv2.imread(image_path)
468
 
469
  if image is None:
 
470
  return {
471
  "success": False,
472
  "message": "Invalid image",
473
  "reason": (
474
+ "The uploaded file could not "
475
+ "be read as an image."
476
  ),
477
  "suggestion": (
478
+ "Upload a valid JPG or PNG image."
479
  )
480
  }
481
 
 
484
  # =========================
485
 
486
  if is_blurry(image):
 
487
  return {
488
  "success": False,
489
  "message": "Image rejected",
490
+ "reason": "The uploaded image is blurry.",
491
  "suggestion": (
492
+ "Retake the photo with better focus."
 
493
  )
494
  }
495
 
 
498
  # =========================
499
 
500
  if is_dark(image):
 
501
  return {
502
  "success": False,
503
  "message": "Image rejected",
504
+ "reason": "The uploaded image is too dark.",
505
  "suggestion": (
506
+ "Take the photo in a brighter environment."
507
  )
508
  }
509
 
 
518
  # =========================
519
 
520
  if len(text.strip()) == 0:
 
521
  return {
522
  "success": False,
523
  "message": "Document rejected",
524
  "reason": (
525
+ "No readable text was detected "
526
+ "in the image."
527
  ),
528
  "suggestion": (
529
+ "Ensure the document is clear "
530
+ "and fully visible."
531
  )
532
  }
533
 
 
542
  # =========================
543
 
544
  if document_result is None:
 
545
  return {
546
  "success": False,
547
  "message": "Document rejected",
548
  "reason": (
549
+ "The uploaded image does not match "
550
+ "any supported document type."
551
  ),
552
  "supported_documents": [
553
  "National ID (NIN)",
 
598
  "reason": str(e)
599
  }
600
 
601
+ finally:
602
 
603
+ # =========================
604
+ # CLEAN TEMP FILE
605
+ # =========================
 
 
 
 
606
 
607
+ if os.path.exists("temp.jpg"):
608
+ os.remove("temp.jpg")