ANISA09 commited on
Commit
39d17aa
·
verified ·
1 Parent(s): cb2f4b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -23
app.py CHANGED
@@ -1,48 +1,84 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
 
3
  import pytesseract
4
  import cv2
5
- from PIL import Image
6
  import numpy as np
7
- import pytesseract
8
-
9
 
10
  # -------------------------------------------------------------
11
- # Load Model
12
  # -------------------------------------------------------------
13
- # Replace with your fine-tuned model on Hugging Face Hub
14
-
15
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
16
 
17
- MODEL_ID = "google/vit-base-patch16-224-in21k"
 
 
 
 
 
 
 
18
 
19
- classifier = pipeline("image-classification", model=MODEL_ID)
 
 
 
 
 
 
 
 
20
 
21
  # -------------------------------------------------------------
22
  # Certificate Verification Function
23
  # -------------------------------------------------------------
 
 
24
  def verify_certificate(image):
25
- # Convert to RGB if needed
26
  if not isinstance(image, Image.Image):
27
  image = Image.fromarray(image)
28
  image = image.convert("RGB")
29
 
30
- # 1️⃣ Model prediction
31
- preds = classifier(image)
32
- top_pred = preds[0]
33
- label = top_pred["label"]
34
- score = float(top_pred["score"])
 
 
 
 
 
35
 
36
- # 2️⃣ OCR text extraction (optional but helpful)
 
 
37
  img_np = np.array(image)
38
- img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
39
- gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
40
  text = pytesseract.image_to_string(gray)
41
 
42
- # 3️⃣ Combine result
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  result = {
44
- "prediction": label,
45
- "confidence": round(score, 4),
 
 
46
  "text_preview": text[:300]
47
  }
48
  return result
@@ -54,8 +90,8 @@ demo = gr.Interface(
54
  fn=verify_certificate,
55
  inputs=gr.Image(type="numpy", label="Upload Certificate Image"),
56
  outputs=gr.JSON(label="Verification Result"),
57
- title="Fake Certificate Verification API 🧠",
58
- description="Uploads a certificate image, runs an ML model to detect forgery, and extracts text for review.",
59
  )
60
 
61
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoFeatureExtractor, AutoModelForImageClassification
3
+ import torch
4
+ from torchvision import transforms
5
+ from PIL import Image
6
  import pytesseract
7
  import cv2
 
8
  import numpy as np
 
 
9
 
10
  # -------------------------------------------------------------
11
+ # Setup OCR
12
  # -------------------------------------------------------------
 
 
13
  pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
14
 
15
+ # -------------------------------------------------------------
16
+ # Load Pretrained Vision Model
17
+ # -------------------------------------------------------------
18
+ # Using ResNet18 for demonstration
19
+ from torchvision.models import resnet18
20
+
21
+ model = resnet18(weights="IMAGENET1K_V1")
22
+ model.eval()
23
 
24
+ # Define transform for the model
25
+ preprocess = transforms.Compose([
26
+ transforms.Resize((224, 224)),
27
+ transforms.ToTensor(),
28
+ transforms.Normalize(
29
+ mean=[0.485, 0.456, 0.406],
30
+ std=[0.229, 0.224, 0.225]
31
+ )
32
+ ])
33
 
34
  # -------------------------------------------------------------
35
  # Certificate Verification Function
36
  # -------------------------------------------------------------
37
+ REQUIRED_KEYWORDS = ["certificate", "proudly presented", "position", "organized by", "date"]
38
+
39
  def verify_certificate(image):
40
+ # Ensure PIL Image
41
  if not isinstance(image, Image.Image):
42
  image = Image.fromarray(image)
43
  image = image.convert("RGB")
44
 
45
+ # ------------------------------
46
+ # 1️⃣ Model Prediction (generic)
47
+ # ------------------------------
48
+ input_tensor = preprocess(image).unsqueeze(0) # add batch dim
49
+ with torch.no_grad():
50
+ outputs = model(input_tensor)
51
+ probs = torch.nn.functional.softmax(outputs[0], dim=0)
52
+ top_prob, top_catid = torch.topk(probs, 1)
53
+ model_confidence = float(top_prob.item())
54
+ model_label = str(top_catid.item()) # generic label index
55
 
56
+ # ------------------------------
57
+ # 2️⃣ OCR Extraction
58
+ # ------------------------------
59
  img_np = np.array(image)
60
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
 
61
  text = pytesseract.image_to_string(gray)
62
 
63
+ # ------------------------------
64
+ # 3️⃣ Heuristic Text Scoring
65
+ # ------------------------------
66
+ keyword_matches = sum([1 for kw in REQUIRED_KEYWORDS if kw.lower() in text.lower()])
67
+ ocr_score = keyword_matches / len(REQUIRED_KEYWORDS)
68
+
69
+ # ------------------------------
70
+ # 4️⃣ Combine Model + OCR
71
+ # ------------------------------
72
+ combined_confidence = round((model_confidence + ocr_score) / 2, 4)
73
+
74
+ # ------------------------------
75
+ # 5️⃣ Return Result
76
+ # ------------------------------
77
  result = {
78
+ "model_label": model_label,
79
+ "model_confidence": round(model_confidence, 4),
80
+ "ocr_score": round(ocr_score, 4),
81
+ "combined_confidence": combined_confidence,
82
  "text_preview": text[:300]
83
  }
84
  return result
 
90
  fn=verify_certificate,
91
  inputs=gr.Image(type="numpy", label="Upload Certificate Image"),
92
  outputs=gr.JSON(label="Verification Result"),
93
+ title="Certificate Verification AI 🧠",
94
+ description="Uploads a certificate image, checks for authenticity using a vision model and OCR keyword heuristics."
95
  )
96
 
97
  if __name__ == "__main__":