Spaces:

ANISA09
/

ml

Sleeping

App Files Files Community

ANISA09 commited on Nov 3, 2025

Commit

39d17aa

verified ·

1 Parent(s): cb2f4b6

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -23

app.py CHANGED Viewed

@@ -1,48 +1,84 @@
 import gradio as gr
-from transformers import pipeline
 import pytesseract
 import cv2
-from PIL import Image
 import numpy as np
-import pytesseract
 # -------------------------------------------------------------
-#  Load Model
 # -------------------------------------------------------------
-# Replace with your fine-tuned model on Hugging Face Hub
 pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
-MODEL_ID = "google/vit-base-patch16-224-in21k"
-classifier = pipeline("image-classification", model=MODEL_ID)
 # -------------------------------------------------------------
 #  Certificate Verification Function
 # -------------------------------------------------------------
 def verify_certificate(image):
-    # Convert to RGB if needed
     if not isinstance(image, Image.Image):
         image = Image.fromarray(image)
     image = image.convert("RGB")
-    # 1️⃣ Model prediction
-    preds = classifier(image)
-    top_pred = preds[0]
-    label = top_pred["label"]
-    score = float(top_pred["score"])
-    # 2️⃣ OCR text extraction (optional but helpful)
     img_np = np.array(image)
-    img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
-    gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
     text = pytesseract.image_to_string(gray)
-    # 3️⃣ Combine result
     result = {
-        "prediction": label,
-        "confidence": round(score, 4),
         "text_preview": text[:300]
     }
     return result
@@ -54,8 +90,8 @@ demo = gr.Interface(
     fn=verify_certificate,
     inputs=gr.Image(type="numpy", label="Upload Certificate Image"),
     outputs=gr.JSON(label="Verification Result"),
-    title="Fake Certificate Verification API 🧠",
-    description="Uploads a certificate image, runs an ML model to detect forgery, and extracts text for review.",
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import AutoFeatureExtractor, AutoModelForImageClassification
+import torch
+from torchvision import transforms
+from PIL import Image
 import pytesseract
 import cv2
 import numpy as np
 # -------------------------------------------------------------
+#  Setup OCR
 # -------------------------------------------------------------
 pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
+# -------------------------------------------------------------
+#  Load Pretrained Vision Model
+# -------------------------------------------------------------
+# Using ResNet18 for demonstration
+from torchvision.models import resnet18
+model = resnet18(weights="IMAGENET1K_V1")
+model.eval()
+# Define transform for the model
+preprocess = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]
+    )
+])
 # -------------------------------------------------------------
 #  Certificate Verification Function
 # -------------------------------------------------------------
+REQUIRED_KEYWORDS = ["certificate", "proudly presented", "position", "organized by", "date"]
 def verify_certificate(image):
+    # Ensure PIL Image
     if not isinstance(image, Image.Image):
         image = Image.fromarray(image)
     image = image.convert("RGB")
+    # ------------------------------
+    # 1️⃣ Model Prediction (generic)
+    # ------------------------------
+    input_tensor = preprocess(image).unsqueeze(0)  # add batch dim
+    with torch.no_grad():
+        outputs = model(input_tensor)
+        probs = torch.nn.functional.softmax(outputs[0], dim=0)
+        top_prob, top_catid = torch.topk(probs, 1)
+    model_confidence = float(top_prob.item())
+    model_label = str(top_catid.item())  # generic label index
+    # ------------------------------
+    # 2️⃣ OCR Extraction
+    # ------------------------------
     img_np = np.array(image)
+    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
     text = pytesseract.image_to_string(gray)
+    # ------------------------------
+    # 3️⃣ Heuristic Text Scoring
+    # ------------------------------
+    keyword_matches = sum([1 for kw in REQUIRED_KEYWORDS if kw.lower() in text.lower()])
+    ocr_score = keyword_matches / len(REQUIRED_KEYWORDS)
+    # ------------------------------
+    # 4️⃣ Combine Model + OCR
+    # ------------------------------
+    combined_confidence = round((model_confidence + ocr_score) / 2, 4)
+    # ------------------------------
+    # 5️⃣ Return Result
+    # ------------------------------
     result = {
+        "model_label": model_label,
+        "model_confidence": round(model_confidence, 4),
+        "ocr_score": round(ocr_score, 4),
+        "combined_confidence": combined_confidence,
         "text_preview": text[:300]
     }
     return result
     fn=verify_certificate,
     inputs=gr.Image(type="numpy", label="Upload Certificate Image"),
     outputs=gr.JSON(label="Verification Result"),
+    title="Certificate Verification AI 🧠",
+    description="Uploads a certificate image, checks for authenticity using a vision model and OCR keyword heuristics."
 )
 if __name__ == "__main__":